1 /* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
56 The algorithm is applied to each basic block in three phases:
57
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when the stored
65 values get used subsequently).
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
74
75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76 store_immediate_info objects) and coalesce contiguous stores into
77 merged_store_group objects. For bit-field stores, we don't need to
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h" /* For get_bit_range. */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173
174 /* The maximum size (in bits) of the stores this pass should generate. */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
178 /* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180 #define MAX_STORE_ALIAS_CHECKS 64
181
182 namespace {
183
184 struct bswap_stat
185 {
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194 } nop_stats, bswap_stats;
195
196 /* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225 struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
230 poly_int64_pod bytepos;
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236 };
237
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244 /* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250 /* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264 {
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 uint64_t head_marker;
267
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306 }
307
308 /* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326 }
327
328 /* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334 int size;
335
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360 }
361
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
371 poly_int64 bitsize, bitpos, bytepos;
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
390 {
391 poly_offset_int bit_offset = 0;
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
404 if (maybe_lt (bit_offset, 0))
405 {
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
409 if (offset)
410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 else
412 offset = byte_offset;
413 }
414
415 bitpos += bit_offset.force_shwi ();
416 }
417 else
418 base_addr = build_fold_addr_expr (base_addr);
419
420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421 return false;
422 if (!multiple_p (bitsize, BITS_PER_UNIT))
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
431 n->bytepos = bytepos;
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435 }
436
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445 {
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
482 {
483 n_start = n1;
484 start_sub = start2 - start1;
485 }
486 else
487 {
488 n_start = n2;
489 start_sub = start1 - start2;
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
521 n->range = end - MIN (start1, start2) + 1;
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573 }
574
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583 {
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
601 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603 if (bitpos % BITS_PER_UNIT == 0
604 && bitsize % BITS_PER_UNIT == 0
605 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 {
607 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
608 if (BYTES_BIG_ENDIAN)
609 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610
611 /* Shift. */
612 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 return NULL;
614
615 /* Mask. */
616 uint64_t mask = 0;
617 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 i++, tmp <<= BITS_PER_UNIT)
620 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 n->n &= mask;
622
623 /* Convert. */
624 n->type = TREE_TYPE (rhs1);
625 if (!n->base_addr)
626 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627
628 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 }
630
631 return NULL;
632 }
633
634 if (TREE_CODE (rhs1) != SSA_NAME)
635 return NULL;
636
637 code = gimple_assign_rhs_code (stmt);
638 rhs_class = gimple_assign_rhs_class (stmt);
639 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640
641 if (rhs_class == GIMPLE_BINARY_RHS)
642 rhs2 = gimple_assign_rhs2 (stmt);
643
644 /* Handle unary rhs and binary rhs with integer constants as second
645 operand. */
646
647 if (rhs_class == GIMPLE_UNARY_RHS
648 || (rhs_class == GIMPLE_BINARY_RHS
649 && TREE_CODE (rhs2) == INTEGER_CST))
650 {
651 if (code != BIT_AND_EXPR
652 && code != LSHIFT_EXPR
653 && code != RSHIFT_EXPR
654 && code != LROTATE_EXPR
655 && code != RROTATE_EXPR
656 && !CONVERT_EXPR_CODE_P (code))
657 return NULL;
658
659 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660
661 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 we have to initialize the symbolic number. */
663 if (!source_stmt1)
664 {
665 if (gimple_assign_load_p (stmt)
666 || !init_symbolic_number (n, rhs1))
667 return NULL;
668 source_stmt1 = stmt;
669 }
670
671 switch (code)
672 {
673 case BIT_AND_EXPR:
674 {
675 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 uint64_t val = int_cst_value (rhs2), mask = 0;
677 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678
679 /* Only constants masking full bytes are allowed. */
680 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 if ((val & tmp) != 0 && (val & tmp) != tmp)
682 return NULL;
683 else if (val & tmp)
684 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685
686 n->n &= mask;
687 }
688 break;
689 case LSHIFT_EXPR:
690 case RSHIFT_EXPR:
691 case LROTATE_EXPR:
692 case RROTATE_EXPR:
693 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 return NULL;
695 break;
696 CASE_CONVERT:
697 {
698 int i, type_size, old_type_size;
699 tree type;
700
701 type = gimple_expr_type (stmt);
702 type_size = TYPE_PRECISION (type);
703 if (type_size % BITS_PER_UNIT != 0)
704 return NULL;
705 type_size /= BITS_PER_UNIT;
706 if (type_size > 64 / BITS_PER_MARKER)
707 return NULL;
708
709 /* Sign extension: result is dependent on the value. */
710 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 && HEAD_MARKER (n->n, old_type_size))
713 for (i = 0; i < type_size - old_type_size; i++)
714 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 << ((type_size - 1 - i) * BITS_PER_MARKER);
716
717 if (type_size < 64 / BITS_PER_MARKER)
718 {
719 /* If STMT casts to a smaller type mask out the bits not
720 belonging to the target type. */
721 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 }
723 n->type = type;
724 if (!n->base_addr)
725 n->range = type_size;
726 }
727 break;
728 default:
729 return NULL;
730 };
731 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732 }
733
734 /* Handle binary rhs. */
735
736 if (rhs_class == GIMPLE_BINARY_RHS)
737 {
738 struct symbolic_number n1, n2;
739 gimple *source_stmt, *source_stmt2;
740
741 if (code != BIT_IOR_EXPR)
742 return NULL;
743
744 if (TREE_CODE (rhs2) != SSA_NAME)
745 return NULL;
746
747 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748
749 switch (code)
750 {
751 case BIT_IOR_EXPR:
752 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753
754 if (!source_stmt1)
755 return NULL;
756
757 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758
759 if (!source_stmt2)
760 return NULL;
761
762 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 return NULL;
764
765 if (n1.vuse != n2.vuse)
766 return NULL;
767
768 source_stmt
769 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770
771 if (!source_stmt)
772 return NULL;
773
774 if (!verify_symbolic_number_p (n, stmt))
775 return NULL;
776
777 break;
778 default:
779 return NULL;
780 }
781 return source_stmt;
782 }
783 return NULL;
784 }
785
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787 *CMPXCHG, *CMPNOP and adjust *N. */
788
789 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop)790 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 uint64_t *cmpnop)
792 {
793 unsigned rsize;
794 uint64_t tmpn, mask;
795
796 /* The number which the find_bswap_or_nop_1 result should match in order
797 to have a full byte swap. The number is shifted to the right
798 according to the size of the symbolic number before using it. */
799 *cmpxchg = CMPXCHG;
800 *cmpnop = CMPNOP;
801
802 /* Find real size of result (highest non-zero byte). */
803 if (n->base_addr)
804 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805 else
806 rsize = n->range;
807
808 /* Zero out the bits corresponding to untouched bytes in original gimple
809 expression. */
810 if (n->range < (int) sizeof (int64_t))
811 {
812 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814 *cmpnop &= mask;
815 }
816
817 /* Zero out the bits corresponding to unused bytes in the result of the
818 gimple expression. */
819 if (rsize < n->range)
820 {
821 if (BYTES_BIG_ENDIAN)
822 {
823 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824 *cmpxchg &= mask;
825 if (n->range - rsize == sizeof (int64_t))
826 *cmpnop = 0;
827 else
828 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
829 }
830 else
831 {
832 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
833 if (n->range - rsize == sizeof (int64_t))
834 *cmpxchg = 0;
835 else
836 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
837 *cmpnop &= mask;
838 }
839 n->range = rsize;
840 }
841
842 n->range *= BITS_PER_UNIT;
843 }
844
845 /* Check if STMT completes a bswap implementation or a read in a given
846 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
847 accordingly. It also sets N to represent the kind of operations
848 performed: size of the resulting expression and whether it works on
849 a memory source, and if so alias-set and vuse. At last, the
850 function returns a stmt whose rhs's first tree is the source
851 expression. */
852
853 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap)854 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
855 {
856 /* The last parameter determines the depth search limit. It usually
857 correlates directly to the number n of bytes to be touched. We
858 increase that number by 2 * (log2(n) + 1) here in order to also
859 cover signed -> unsigned conversions of the src operand as can be seen
860 in libgcc, and for initial shift/and operation of the src operand. */
861 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
862 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
863 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
864
865 if (!ins_stmt)
866 return NULL;
867
868 uint64_t cmpxchg, cmpnop;
869 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
870
871 /* A complete byte swap should make the symbolic number to start with
872 the largest digit in the highest order byte. Unchanged symbolic
873 number indicates a read with same endianness as target architecture. */
874 if (n->n == cmpnop)
875 *bswap = false;
876 else if (n->n == cmpxchg)
877 *bswap = true;
878 else
879 return NULL;
880
881 /* Useless bit manipulation performed by code. */
882 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
883 return NULL;
884
885 return ins_stmt;
886 }
887
888 const pass_data pass_data_optimize_bswap =
889 {
890 GIMPLE_PASS, /* type */
891 "bswap", /* name */
892 OPTGROUP_NONE, /* optinfo_flags */
893 TV_NONE, /* tv_id */
894 PROP_ssa, /* properties_required */
895 0, /* properties_provided */
896 0, /* properties_destroyed */
897 0, /* todo_flags_start */
898 0, /* todo_flags_finish */
899 };
900
901 class pass_optimize_bswap : public gimple_opt_pass
902 {
903 public:
pass_optimize_bswap(gcc::context * ctxt)904 pass_optimize_bswap (gcc::context *ctxt)
905 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
906 {}
907
908 /* opt_pass methods: */
gate(function *)909 virtual bool gate (function *)
910 {
911 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
912 }
913
914 virtual unsigned int execute (function *);
915
916 }; // class pass_optimize_bswap
917
918 /* Perform the bswap optimization: replace the expression computed in the rhs
919 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
920 bswap, load or load + bswap expression.
921 Which of these alternatives replace the rhs is given by N->base_addr (non
922 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
923 load to perform are also given in N while the builtin bswap invoke is given
924 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
925 load statements involved to construct the rhs in gsi_stmt (GSI) and
926 N->range gives the size of the rhs expression for maintaining some
927 statistics.
928
929 Note that if the replacement involve a load and if gsi_stmt (GSI) is
930 non-NULL, that stmt is moved just after INS_STMT to do the load with the
931 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
932
933 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap)934 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
935 tree bswap_type, tree load_type, struct symbolic_number *n,
936 bool bswap)
937 {
938 tree src, tmp, tgt = NULL_TREE;
939 gimple *bswap_stmt;
940
941 gimple *cur_stmt = gsi_stmt (gsi);
942 src = n->src;
943 if (cur_stmt)
944 tgt = gimple_assign_lhs (cur_stmt);
945
946 /* Need to load the value from memory first. */
947 if (n->base_addr)
948 {
949 gimple_stmt_iterator gsi_ins = gsi;
950 if (ins_stmt)
951 gsi_ins = gsi_for_stmt (ins_stmt);
952 tree addr_expr, addr_tmp, val_expr, val_tmp;
953 tree load_offset_ptr, aligned_load_type;
954 gimple *load_stmt;
955 unsigned align = get_object_alignment (src);
956 poly_int64 load_offset = 0;
957
958 if (cur_stmt)
959 {
960 basic_block ins_bb = gimple_bb (ins_stmt);
961 basic_block cur_bb = gimple_bb (cur_stmt);
962 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
963 return NULL_TREE;
964
965 /* Move cur_stmt just before one of the load of the original
966 to ensure it has the same VUSE. See PR61517 for what could
967 go wrong. */
968 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
969 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
970 gsi_move_before (&gsi, &gsi_ins);
971 gsi = gsi_for_stmt (cur_stmt);
972 }
973 else
974 gsi = gsi_ins;
975
976 /* Compute address to load from and cast according to the size
977 of the load. */
978 addr_expr = build_fold_addr_expr (src);
979 if (is_gimple_mem_ref_addr (addr_expr))
980 addr_tmp = unshare_expr (addr_expr);
981 else
982 {
983 addr_tmp = unshare_expr (n->base_addr);
984 if (!is_gimple_mem_ref_addr (addr_tmp))
985 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
986 is_gimple_mem_ref_addr,
987 NULL_TREE, true,
988 GSI_SAME_STMT);
989 load_offset = n->bytepos;
990 if (n->offset)
991 {
992 tree off
993 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
994 true, NULL_TREE, true,
995 GSI_SAME_STMT);
996 gimple *stmt
997 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
998 POINTER_PLUS_EXPR, addr_tmp, off);
999 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1000 addr_tmp = gimple_assign_lhs (stmt);
1001 }
1002 }
1003
1004 /* Perform the load. */
1005 aligned_load_type = load_type;
1006 if (align < TYPE_ALIGN (load_type))
1007 aligned_load_type = build_aligned_type (load_type, align);
1008 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1009 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1010 load_offset_ptr);
1011
1012 if (!bswap)
1013 {
1014 if (n->range == 16)
1015 nop_stats.found_16bit++;
1016 else if (n->range == 32)
1017 nop_stats.found_32bit++;
1018 else
1019 {
1020 gcc_assert (n->range == 64);
1021 nop_stats.found_64bit++;
1022 }
1023
1024 /* Convert the result of load if necessary. */
1025 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1026 {
1027 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1028 "load_dst");
1029 load_stmt = gimple_build_assign (val_tmp, val_expr);
1030 gimple_set_vuse (load_stmt, n->vuse);
1031 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1032 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1033 update_stmt (cur_stmt);
1034 }
1035 else if (cur_stmt)
1036 {
1037 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1038 gimple_set_vuse (cur_stmt, n->vuse);
1039 update_stmt (cur_stmt);
1040 }
1041 else
1042 {
1043 tgt = make_ssa_name (load_type);
1044 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1045 gimple_set_vuse (cur_stmt, n->vuse);
1046 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1047 }
1048
1049 if (dump_file)
1050 {
1051 fprintf (dump_file,
1052 "%d bit load in target endianness found at: ",
1053 (int) n->range);
1054 print_gimple_stmt (dump_file, cur_stmt, 0);
1055 }
1056 return tgt;
1057 }
1058 else
1059 {
1060 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1061 load_stmt = gimple_build_assign (val_tmp, val_expr);
1062 gimple_set_vuse (load_stmt, n->vuse);
1063 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1064 }
1065 src = val_tmp;
1066 }
1067 else if (!bswap)
1068 {
1069 gimple *g = NULL;
1070 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1071 {
1072 if (!is_gimple_val (src))
1073 return NULL_TREE;
1074 g = gimple_build_assign (tgt, NOP_EXPR, src);
1075 }
1076 else if (cur_stmt)
1077 g = gimple_build_assign (tgt, src);
1078 else
1079 tgt = src;
1080 if (n->range == 16)
1081 nop_stats.found_16bit++;
1082 else if (n->range == 32)
1083 nop_stats.found_32bit++;
1084 else
1085 {
1086 gcc_assert (n->range == 64);
1087 nop_stats.found_64bit++;
1088 }
1089 if (dump_file)
1090 {
1091 fprintf (dump_file,
1092 "%d bit reshuffle in target endianness found at: ",
1093 (int) n->range);
1094 if (cur_stmt)
1095 print_gimple_stmt (dump_file, cur_stmt, 0);
1096 else
1097 {
1098 print_generic_expr (dump_file, tgt, TDF_NONE);
1099 fprintf (dump_file, "\n");
1100 }
1101 }
1102 if (cur_stmt)
1103 gsi_replace (&gsi, g, true);
1104 return tgt;
1105 }
1106 else if (TREE_CODE (src) == BIT_FIELD_REF)
1107 src = TREE_OPERAND (src, 0);
1108
1109 if (n->range == 16)
1110 bswap_stats.found_16bit++;
1111 else if (n->range == 32)
1112 bswap_stats.found_32bit++;
1113 else
1114 {
1115 gcc_assert (n->range == 64);
1116 bswap_stats.found_64bit++;
1117 }
1118
1119 tmp = src;
1120
1121 /* Convert the src expression if necessary. */
1122 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1123 {
1124 gimple *convert_stmt;
1125
1126 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1127 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1128 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1129 }
1130
1131 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1132 are considered as rotation of 2N bit values by N bits is generally not
1133 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1134 gives 0x03040102 while a bswap for that value is 0x04030201. */
1135 if (bswap && n->range == 16)
1136 {
1137 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1138 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1139 bswap_stmt = gimple_build_assign (NULL, src);
1140 }
1141 else
1142 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1143
1144 if (tgt == NULL_TREE)
1145 tgt = make_ssa_name (bswap_type);
1146 tmp = tgt;
1147
1148 /* Convert the result if necessary. */
1149 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1150 {
1151 gimple *convert_stmt;
1152
1153 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1154 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1155 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1156 }
1157
1158 gimple_set_lhs (bswap_stmt, tmp);
1159
1160 if (dump_file)
1161 {
1162 fprintf (dump_file, "%d bit bswap implementation found at: ",
1163 (int) n->range);
1164 if (cur_stmt)
1165 print_gimple_stmt (dump_file, cur_stmt, 0);
1166 else
1167 {
1168 print_generic_expr (dump_file, tgt, TDF_NONE);
1169 fprintf (dump_file, "\n");
1170 }
1171 }
1172
1173 if (cur_stmt)
1174 {
1175 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1176 gsi_remove (&gsi, true);
1177 }
1178 else
1179 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1180 return tgt;
1181 }
1182
1183 /* Find manual byte swap implementations as well as load in a given
1184 endianness. Byte swaps are turned into a bswap builtin invokation
1185 while endian loads are converted to bswap builtin invokation or
1186 simple load according to the target endianness. */
1187
1188 unsigned int
execute(function * fun)1189 pass_optimize_bswap::execute (function *fun)
1190 {
1191 basic_block bb;
1192 bool bswap32_p, bswap64_p;
1193 bool changed = false;
1194 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1195
1196 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1197 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1198 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1199 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1200 || (bswap32_p && word_mode == SImode)));
1201
1202 /* Determine the argument type of the builtins. The code later on
1203 assumes that the return and argument type are the same. */
1204 if (bswap32_p)
1205 {
1206 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1207 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208 }
1209
1210 if (bswap64_p)
1211 {
1212 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1213 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1214 }
1215
1216 memset (&nop_stats, 0, sizeof (nop_stats));
1217 memset (&bswap_stats, 0, sizeof (bswap_stats));
1218 calculate_dominance_info (CDI_DOMINATORS);
1219
1220 FOR_EACH_BB_FN (bb, fun)
1221 {
1222 gimple_stmt_iterator gsi;
1223
1224 /* We do a reverse scan for bswap patterns to make sure we get the
1225 widest match. As bswap pattern matching doesn't handle previously
1226 inserted smaller bswap replacements as sub-patterns, the wider
1227 variant wouldn't be detected. */
1228 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1229 {
1230 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1231 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1232 enum tree_code code;
1233 struct symbolic_number n;
1234 bool bswap;
1235
1236 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1237 might be moved to a different basic block by bswap_replace and gsi
1238 must not points to it if that's the case. Moving the gsi_prev
1239 there make sure that gsi points to the statement previous to
1240 cur_stmt while still making sure that all statements are
1241 considered in this basic block. */
1242 gsi_prev (&gsi);
1243
1244 if (!is_gimple_assign (cur_stmt))
1245 continue;
1246
1247 code = gimple_assign_rhs_code (cur_stmt);
1248 switch (code)
1249 {
1250 case LROTATE_EXPR:
1251 case RROTATE_EXPR:
1252 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1253 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1254 % BITS_PER_UNIT)
1255 continue;
1256 /* Fall through. */
1257 case BIT_IOR_EXPR:
1258 break;
1259 default:
1260 continue;
1261 }
1262
1263 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1264
1265 if (!ins_stmt)
1266 continue;
1267
1268 switch (n.range)
1269 {
1270 case 16:
1271 /* Already in canonical form, nothing to do. */
1272 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1273 continue;
1274 load_type = bswap_type = uint16_type_node;
1275 break;
1276 case 32:
1277 load_type = uint32_type_node;
1278 if (bswap32_p)
1279 {
1280 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1281 bswap_type = bswap32_type;
1282 }
1283 break;
1284 case 64:
1285 load_type = uint64_type_node;
1286 if (bswap64_p)
1287 {
1288 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1289 bswap_type = bswap64_type;
1290 }
1291 break;
1292 default:
1293 continue;
1294 }
1295
1296 if (bswap && !fndecl && n.range != 16)
1297 continue;
1298
1299 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1300 bswap_type, load_type, &n, bswap))
1301 changed = true;
1302 }
1303 }
1304
1305 statistics_counter_event (fun, "16-bit nop implementations found",
1306 nop_stats.found_16bit);
1307 statistics_counter_event (fun, "32-bit nop implementations found",
1308 nop_stats.found_32bit);
1309 statistics_counter_event (fun, "64-bit nop implementations found",
1310 nop_stats.found_64bit);
1311 statistics_counter_event (fun, "16-bit bswap implementations found",
1312 bswap_stats.found_16bit);
1313 statistics_counter_event (fun, "32-bit bswap implementations found",
1314 bswap_stats.found_32bit);
1315 statistics_counter_event (fun, "64-bit bswap implementations found",
1316 bswap_stats.found_64bit);
1317
1318 return (changed ? TODO_update_ssa : 0);
1319 }
1320
1321 } // anon namespace
1322
1323 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1324 make_pass_optimize_bswap (gcc::context *ctxt)
1325 {
1326 return new pass_optimize_bswap (ctxt);
1327 }
1328
1329 namespace {
1330
1331 /* Struct recording one operand for the store, which is either a constant,
1332 then VAL represents the constant and all the other fields are zero, or
1333 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1334 and the other fields also reflect the memory load, or an SSA name, then
1335 VAL represents the SSA name and all the other fields are zero, */
1336
1337 class store_operand_info
1338 {
1339 public:
1340 tree val;
1341 tree base_addr;
1342 poly_uint64 bitsize;
1343 poly_uint64 bitpos;
1344 poly_uint64 bitregion_start;
1345 poly_uint64 bitregion_end;
1346 gimple *stmt;
1347 bool bit_not_p;
1348 store_operand_info ();
1349 };
1350
store_operand_info()1351 store_operand_info::store_operand_info ()
1352 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1353 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1354 {
1355 }
1356
1357 /* Struct recording the information about a single store of an immediate
1358 to memory. These are created in the first phase and coalesced into
1359 merged_store_group objects in the second phase. */
1360
1361 class store_immediate_info
1362 {
1363 public:
1364 unsigned HOST_WIDE_INT bitsize;
1365 unsigned HOST_WIDE_INT bitpos;
1366 unsigned HOST_WIDE_INT bitregion_start;
1367 /* This is one past the last bit of the bit region. */
1368 unsigned HOST_WIDE_INT bitregion_end;
1369 gimple *stmt;
1370 unsigned int order;
1371 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1372 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1373 for bit insertion.
1374 LROTATE_EXPR if it can be only bswap optimized and
1375 ops are not really meaningful.
1376 NOP_EXPR if bswap optimization detected identity, ops
1377 are not meaningful. */
1378 enum tree_code rhs_code;
1379 /* Two fields for bswap optimization purposes. */
1380 struct symbolic_number n;
1381 gimple *ins_stmt;
1382 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
1383 bool bit_not_p;
1384 /* True if ops have been swapped and thus ops[1] represents
1385 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1386 bool ops_swapped_p;
1387 /* The index number of the landing pad, or 0 if there is none. */
1388 int lp_nr;
1389 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1390 just the first one. */
1391 store_operand_info ops[2];
1392 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1393 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1394 gimple *, unsigned int, enum tree_code,
1395 struct symbolic_number &, gimple *, bool, int,
1396 const store_operand_info &,
1397 const store_operand_info &);
1398 };
1399
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1400 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1401 unsigned HOST_WIDE_INT bp,
1402 unsigned HOST_WIDE_INT brs,
1403 unsigned HOST_WIDE_INT bre,
1404 gimple *st,
1405 unsigned int ord,
1406 enum tree_code rhscode,
1407 struct symbolic_number &nr,
1408 gimple *ins_stmtp,
1409 bool bitnotp,
1410 int nr2,
1411 const store_operand_info &op0r,
1412 const store_operand_info &op1r)
1413 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1414 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1415 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1416 lp_nr (nr2)
1417 #if __cplusplus >= 201103L
1418 , ops { op0r, op1r }
1419 {
1420 }
1421 #else
1422 {
1423 ops[0] = op0r;
1424 ops[1] = op1r;
1425 }
1426 #endif
1427
1428 /* Struct representing a group of stores to contiguous memory locations.
1429 These are produced by the second phase (coalescing) and consumed in the
1430 third phase that outputs the widened stores. */
1431
1432 class merged_store_group
1433 {
1434 public:
1435 unsigned HOST_WIDE_INT start;
1436 unsigned HOST_WIDE_INT width;
1437 unsigned HOST_WIDE_INT bitregion_start;
1438 unsigned HOST_WIDE_INT bitregion_end;
1439 /* The size of the allocated memory for val and mask. */
1440 unsigned HOST_WIDE_INT buf_size;
1441 unsigned HOST_WIDE_INT align_base;
1442 poly_uint64 load_align_base[2];
1443
1444 unsigned int align;
1445 unsigned int load_align[2];
1446 unsigned int first_order;
1447 unsigned int last_order;
1448 bool bit_insertion;
1449 bool only_constants;
1450 unsigned int first_nonmergeable_order;
1451 int lp_nr;
1452
1453 auto_vec<store_immediate_info *> stores;
1454 /* We record the first and last original statements in the sequence because
1455 we'll need their vuse/vdef and replacement position. It's easier to keep
1456 track of them separately as 'stores' is reordered by apply_stores. */
1457 gimple *last_stmt;
1458 gimple *first_stmt;
1459 unsigned char *val;
1460 unsigned char *mask;
1461
1462 merged_store_group (store_immediate_info *);
1463 ~merged_store_group ();
1464 bool can_be_merged_into (store_immediate_info *);
1465 void merge_into (store_immediate_info *);
1466 void merge_overlapping (store_immediate_info *);
1467 bool apply_stores ();
1468 private:
1469 void do_merge (store_immediate_info *);
1470 };
1471
1472 /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1473
1474 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1475 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1476 {
1477 if (!fd)
1478 return;
1479
1480 for (unsigned int i = 0; i < len; i++)
1481 fprintf (fd, "%02x ", ptr[i]);
1482 fprintf (fd, "\n");
1483 }
1484
1485 /* Clear out LEN bits starting from bit START in the byte array
1486 PTR. This clears the bits to the *right* from START.
1487 START must be within [0, BITS_PER_UNIT) and counts starting from
1488 the least significant bit. */
1489
1490 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1491 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1492 unsigned int len)
1493 {
1494 if (len == 0)
1495 return;
1496 /* Clear len bits to the right of start. */
1497 else if (len <= start + 1)
1498 {
1499 unsigned char mask = (~(~0U << len));
1500 mask = mask << (start + 1U - len);
1501 ptr[0] &= ~mask;
1502 }
1503 else if (start != BITS_PER_UNIT - 1)
1504 {
1505 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1506 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1507 len - (start % BITS_PER_UNIT) - 1);
1508 }
1509 else if (start == BITS_PER_UNIT - 1
1510 && len > BITS_PER_UNIT)
1511 {
1512 unsigned int nbytes = len / BITS_PER_UNIT;
1513 memset (ptr, 0, nbytes);
1514 if (len % BITS_PER_UNIT != 0)
1515 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1516 len % BITS_PER_UNIT);
1517 }
1518 else
1519 gcc_unreachable ();
1520 }
1521
1522 /* In the byte array PTR clear the bit region starting at bit
1523 START and is LEN bits wide.
1524 For regions spanning multiple bytes do this recursively until we reach
1525 zero LEN or a region contained within a single byte. */
1526
1527 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1528 clear_bit_region (unsigned char *ptr, unsigned int start,
1529 unsigned int len)
1530 {
1531 /* Degenerate base case. */
1532 if (len == 0)
1533 return;
1534 else if (start >= BITS_PER_UNIT)
1535 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1536 /* Second base case. */
1537 else if ((start + len) <= BITS_PER_UNIT)
1538 {
1539 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1540 mask >>= BITS_PER_UNIT - (start + len);
1541
1542 ptr[0] &= ~mask;
1543
1544 return;
1545 }
1546 /* Clear most significant bits in a byte and proceed with the next byte. */
1547 else if (start != 0)
1548 {
1549 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1550 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1551 }
1552 /* Whole bytes need to be cleared. */
1553 else if (start == 0 && len > BITS_PER_UNIT)
1554 {
1555 unsigned int nbytes = len / BITS_PER_UNIT;
1556 /* We could recurse on each byte but we clear whole bytes, so a simple
1557 memset will do. */
1558 memset (ptr, '\0', nbytes);
1559 /* Clear the remaining sub-byte region if there is one. */
1560 if (len % BITS_PER_UNIT != 0)
1561 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1562 }
1563 else
1564 gcc_unreachable ();
1565 }
1566
1567 /* Write BITLEN bits of EXPR to the byte array PTR at
1568 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1569 Return true if the operation succeeded. */
1570
1571 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1572 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1573 unsigned int total_bytes)
1574 {
1575 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1576 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1577 || (bitpos % BITS_PER_UNIT)
1578 || !int_mode_for_size (bitlen, 0).exists ());
1579 bool empty_ctor_p
1580 = (TREE_CODE (expr) == CONSTRUCTOR
1581 && CONSTRUCTOR_NELTS (expr) == 0
1582 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1583 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1584
1585 if (!sub_byte_op_p)
1586 {
1587 if (first_byte >= total_bytes)
1588 return false;
1589 total_bytes -= first_byte;
1590 if (empty_ctor_p)
1591 {
1592 unsigned HOST_WIDE_INT rhs_bytes
1593 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1594 if (rhs_bytes > total_bytes)
1595 return false;
1596 memset (ptr + first_byte, '\0', rhs_bytes);
1597 return true;
1598 }
1599 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1600 }
1601
1602 /* LITTLE-ENDIAN
1603 We are writing a non byte-sized quantity or at a position that is not
1604 at a byte boundary.
1605 |--------|--------|--------| ptr + first_byte
1606 ^ ^
1607 xxx xxxxxxxx xxx< bp>
1608 |______EXPR____|
1609
1610 First native_encode_expr EXPR into a temporary buffer and shift each
1611 byte in the buffer by 'bp' (carrying the bits over as necessary).
1612 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1613 <------bitlen---->< bp>
1614 Then we clear the destination bits:
1615 |---00000|00000000|000-----| ptr + first_byte
1616 <-------bitlen--->< bp>
1617
1618 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1619 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1620
1621 BIG-ENDIAN
1622 We are writing a non byte-sized quantity or at a position that is not
1623 at a byte boundary.
1624 ptr + first_byte |--------|--------|--------|
1625 ^ ^
1626 <bp >xxx xxxxxxxx xxx
1627 |_____EXPR_____|
1628
1629 First native_encode_expr EXPR into a temporary buffer and shift each
1630 byte in the buffer to the right by (carrying the bits over as necessary).
1631 We shift by as much as needed to align the most significant bit of EXPR
1632 with bitpos:
1633 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1634 <---bitlen----> <bp ><-----bitlen----->
1635 Then we clear the destination bits:
1636 ptr + first_byte |-----000||00000000||00000---|
1637 <bp ><-------bitlen----->
1638
1639 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1640 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1641 The awkwardness comes from the fact that bitpos is counted from the
1642 most significant bit of a byte. */
1643
1644 /* We must be dealing with fixed-size data at this point, since the
1645 total size is also fixed. */
1646 unsigned int byte_size;
1647 if (empty_ctor_p)
1648 {
1649 unsigned HOST_WIDE_INT rhs_bytes
1650 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1651 if (rhs_bytes > total_bytes)
1652 return false;
1653 byte_size = rhs_bytes;
1654 }
1655 else
1656 {
1657 fixed_size_mode mode
1658 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1659 byte_size = GET_MODE_SIZE (mode);
1660 }
1661 /* Allocate an extra byte so that we have space to shift into. */
1662 byte_size++;
1663 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1664 memset (tmpbuf, '\0', byte_size);
1665 /* The store detection code should only have allowed constants that are
1666 accepted by native_encode_expr or empty ctors. */
1667 if (!empty_ctor_p
1668 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1669 gcc_unreachable ();
1670
1671 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1672 bytes to write. This means it can write more than
1673 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1674 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1675 bitlen and zero out the bits that are not relevant as well (that may
1676 contain a sign bit due to sign-extension). */
1677 unsigned int padding
1678 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1679 /* On big-endian the padding is at the 'front' so just skip the initial
1680 bytes. */
1681 if (BYTES_BIG_ENDIAN)
1682 tmpbuf += padding;
1683
1684 byte_size -= padding;
1685
1686 if (bitlen % BITS_PER_UNIT != 0)
1687 {
1688 if (BYTES_BIG_ENDIAN)
1689 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1690 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1691 else
1692 clear_bit_region (tmpbuf, bitlen,
1693 byte_size * BITS_PER_UNIT - bitlen);
1694 }
1695 /* Left shifting relies on the last byte being clear if bitlen is
1696 a multiple of BITS_PER_UNIT, which might not be clear if
1697 there are padding bytes. */
1698 else if (!BYTES_BIG_ENDIAN)
1699 tmpbuf[byte_size - 1] = '\0';
1700
1701 /* Clear the bit region in PTR where the bits from TMPBUF will be
1702 inserted into. */
1703 if (BYTES_BIG_ENDIAN)
1704 clear_bit_region_be (ptr + first_byte,
1705 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1706 else
1707 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1708
1709 int shift_amnt;
1710 int bitlen_mod = bitlen % BITS_PER_UNIT;
1711 int bitpos_mod = bitpos % BITS_PER_UNIT;
1712
1713 bool skip_byte = false;
1714 if (BYTES_BIG_ENDIAN)
1715 {
1716 /* BITPOS and BITLEN are exactly aligned and no shifting
1717 is necessary. */
1718 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1719 || (bitpos_mod == 0 && bitlen_mod == 0))
1720 shift_amnt = 0;
1721 /* |. . . . . . . .|
1722 <bp > <blen >.
1723 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1724 of the value until it aligns with 'bp' in the next byte over. */
1725 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1726 {
1727 shift_amnt = bitlen_mod + bitpos_mod;
1728 skip_byte = bitlen_mod != 0;
1729 }
1730 /* |. . . . . . . .|
1731 <----bp--->
1732 <---blen---->.
1733 Shift the value right within the same byte so it aligns with 'bp'. */
1734 else
1735 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1736 }
1737 else
1738 shift_amnt = bitpos % BITS_PER_UNIT;
1739
1740 /* Create the shifted version of EXPR. */
1741 if (!BYTES_BIG_ENDIAN)
1742 {
1743 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1744 if (shift_amnt == 0)
1745 byte_size--;
1746 }
1747 else
1748 {
1749 gcc_assert (BYTES_BIG_ENDIAN);
1750 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1751 /* If shifting right forced us to move into the next byte skip the now
1752 empty byte. */
1753 if (skip_byte)
1754 {
1755 tmpbuf++;
1756 byte_size--;
1757 }
1758 }
1759
1760 /* Insert the bits from TMPBUF. */
1761 for (unsigned int i = 0; i < byte_size; i++)
1762 ptr[first_byte + i] |= tmpbuf[i];
1763
1764 return true;
1765 }
1766
1767 /* Sorting function for store_immediate_info objects.
1768 Sorts them by bitposition. */
1769
1770 static int
sort_by_bitpos(const void * x,const void * y)1771 sort_by_bitpos (const void *x, const void *y)
1772 {
1773 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1774 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1775
1776 if ((*tmp)->bitpos < (*tmp2)->bitpos)
1777 return -1;
1778 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1779 return 1;
1780 else
1781 /* If they are the same let's use the order which is guaranteed to
1782 be different. */
1783 return (*tmp)->order - (*tmp2)->order;
1784 }
1785
1786 /* Sorting function for store_immediate_info objects.
1787 Sorts them by the order field. */
1788
1789 static int
sort_by_order(const void * x,const void * y)1790 sort_by_order (const void *x, const void *y)
1791 {
1792 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1793 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1794
1795 if ((*tmp)->order < (*tmp2)->order)
1796 return -1;
1797 else if ((*tmp)->order > (*tmp2)->order)
1798 return 1;
1799
1800 gcc_unreachable ();
1801 }
1802
1803 /* Initialize a merged_store_group object from a store_immediate_info
1804 object. */
1805
merged_store_group(store_immediate_info * info)1806 merged_store_group::merged_store_group (store_immediate_info *info)
1807 {
1808 start = info->bitpos;
1809 width = info->bitsize;
1810 bitregion_start = info->bitregion_start;
1811 bitregion_end = info->bitregion_end;
1812 /* VAL has memory allocated for it in apply_stores once the group
1813 width has been finalized. */
1814 val = NULL;
1815 mask = NULL;
1816 bit_insertion = false;
1817 only_constants = info->rhs_code == INTEGER_CST;
1818 first_nonmergeable_order = ~0U;
1819 lp_nr = info->lp_nr;
1820 unsigned HOST_WIDE_INT align_bitpos = 0;
1821 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1822 &align, &align_bitpos);
1823 align_base = start - align_bitpos;
1824 for (int i = 0; i < 2; ++i)
1825 {
1826 store_operand_info &op = info->ops[i];
1827 if (op.base_addr == NULL_TREE)
1828 {
1829 load_align[i] = 0;
1830 load_align_base[i] = 0;
1831 }
1832 else
1833 {
1834 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1835 load_align_base[i] = op.bitpos - align_bitpos;
1836 }
1837 }
1838 stores.create (1);
1839 stores.safe_push (info);
1840 last_stmt = info->stmt;
1841 last_order = info->order;
1842 first_stmt = last_stmt;
1843 first_order = last_order;
1844 buf_size = 0;
1845 }
1846
~merged_store_group()1847 merged_store_group::~merged_store_group ()
1848 {
1849 if (val)
1850 XDELETEVEC (val);
1851 }
1852
1853 /* Return true if the store described by INFO can be merged into the group. */
1854
1855 bool
can_be_merged_into(store_immediate_info * info)1856 merged_store_group::can_be_merged_into (store_immediate_info *info)
1857 {
1858 /* Do not merge bswap patterns. */
1859 if (info->rhs_code == LROTATE_EXPR)
1860 return false;
1861
1862 if (info->lp_nr != lp_nr)
1863 return false;
1864
1865 /* The canonical case. */
1866 if (info->rhs_code == stores[0]->rhs_code)
1867 return true;
1868
1869 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1870 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1871 return true;
1872
1873 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1874 return true;
1875
1876 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1877 if (info->rhs_code == MEM_REF
1878 && (stores[0]->rhs_code == INTEGER_CST
1879 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1880 && info->bitregion_start == stores[0]->bitregion_start
1881 && info->bitregion_end == stores[0]->bitregion_end)
1882 return true;
1883
1884 if (stores[0]->rhs_code == MEM_REF
1885 && (info->rhs_code == INTEGER_CST
1886 || info->rhs_code == BIT_INSERT_EXPR)
1887 && info->bitregion_start == stores[0]->bitregion_start
1888 && info->bitregion_end == stores[0]->bitregion_end)
1889 return true;
1890
1891 return false;
1892 }
1893
1894 /* Helper method for merge_into and merge_overlapping to do
1895 the common part. */
1896
1897 void
do_merge(store_immediate_info * info)1898 merged_store_group::do_merge (store_immediate_info *info)
1899 {
1900 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1901 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1902
1903 unsigned int this_align;
1904 unsigned HOST_WIDE_INT align_bitpos = 0;
1905 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1906 &this_align, &align_bitpos);
1907 if (this_align > align)
1908 {
1909 align = this_align;
1910 align_base = info->bitpos - align_bitpos;
1911 }
1912 for (int i = 0; i < 2; ++i)
1913 {
1914 store_operand_info &op = info->ops[i];
1915 if (!op.base_addr)
1916 continue;
1917
1918 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1919 if (this_align > load_align[i])
1920 {
1921 load_align[i] = this_align;
1922 load_align_base[i] = op.bitpos - align_bitpos;
1923 }
1924 }
1925
1926 gimple *stmt = info->stmt;
1927 stores.safe_push (info);
1928 if (info->order > last_order)
1929 {
1930 last_order = info->order;
1931 last_stmt = stmt;
1932 }
1933 else if (info->order < first_order)
1934 {
1935 first_order = info->order;
1936 first_stmt = stmt;
1937 }
1938 if (info->rhs_code != INTEGER_CST)
1939 only_constants = false;
1940 }
1941
1942 /* Merge a store recorded by INFO into this merged store.
1943 The store is not overlapping with the existing recorded
1944 stores. */
1945
1946 void
merge_into(store_immediate_info * info)1947 merged_store_group::merge_into (store_immediate_info *info)
1948 {
1949 /* Make sure we're inserting in the position we think we're inserting. */
1950 gcc_assert (info->bitpos >= start + width
1951 && info->bitregion_start <= bitregion_end);
1952
1953 width = info->bitpos + info->bitsize - start;
1954 do_merge (info);
1955 }
1956
1957 /* Merge a store described by INFO into this merged store.
1958 INFO overlaps in some way with the current store (i.e. it's not contiguous
1959 which is handled by merged_store_group::merge_into). */
1960
1961 void
merge_overlapping(store_immediate_info * info)1962 merged_store_group::merge_overlapping (store_immediate_info *info)
1963 {
1964 /* If the store extends the size of the group, extend the width. */
1965 if (info->bitpos + info->bitsize > start + width)
1966 width = info->bitpos + info->bitsize - start;
1967
1968 do_merge (info);
1969 }
1970
1971 /* Go through all the recorded stores in this group in program order and
1972 apply their values to the VAL byte array to create the final merged
1973 value. Return true if the operation succeeded. */
1974
1975 bool
apply_stores()1976 merged_store_group::apply_stores ()
1977 {
1978 /* Make sure we have more than one store in the group, otherwise we cannot
1979 merge anything. */
1980 if (bitregion_start % BITS_PER_UNIT != 0
1981 || bitregion_end % BITS_PER_UNIT != 0
1982 || stores.length () == 1)
1983 return false;
1984
1985 stores.qsort (sort_by_order);
1986 store_immediate_info *info;
1987 unsigned int i;
1988 /* Create a power-of-2-sized buffer for native_encode_expr. */
1989 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1990 val = XNEWVEC (unsigned char, 2 * buf_size);
1991 mask = val + buf_size;
1992 memset (val, 0, buf_size);
1993 memset (mask, ~0U, buf_size);
1994
1995 FOR_EACH_VEC_ELT (stores, i, info)
1996 {
1997 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1998 tree cst;
1999 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2000 cst = info->ops[0].val;
2001 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2002 cst = info->ops[1].val;
2003 else
2004 cst = NULL_TREE;
2005 bool ret = true;
2006 if (cst)
2007 {
2008 if (info->rhs_code == BIT_INSERT_EXPR)
2009 bit_insertion = true;
2010 else
2011 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2012 pos_in_buffer, buf_size);
2013 }
2014 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2015 if (BYTES_BIG_ENDIAN)
2016 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2017 - (pos_in_buffer % BITS_PER_UNIT)),
2018 info->bitsize);
2019 else
2020 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2021 if (cst && dump_file && (dump_flags & TDF_DETAILS))
2022 {
2023 if (ret)
2024 {
2025 fputs ("After writing ", dump_file);
2026 print_generic_expr (dump_file, cst, TDF_NONE);
2027 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2028 " at position %d\n", info->bitsize, pos_in_buffer);
2029 fputs (" the merged value contains ", dump_file);
2030 dump_char_array (dump_file, val, buf_size);
2031 fputs (" the merged mask contains ", dump_file);
2032 dump_char_array (dump_file, mask, buf_size);
2033 if (bit_insertion)
2034 fputs (" bit insertion is required\n", dump_file);
2035 }
2036 else
2037 fprintf (dump_file, "Failed to merge stores\n");
2038 }
2039 if (!ret)
2040 return false;
2041 }
2042 stores.qsort (sort_by_bitpos);
2043 return true;
2044 }
2045
2046 /* Structure describing the store chain. */
2047
2048 class imm_store_chain_info
2049 {
2050 public:
2051 /* Doubly-linked list that imposes an order on chain processing.
2052 PNXP (prev's next pointer) points to the head of a list, or to
2053 the next field in the previous chain in the list.
2054 See pass_store_merging::m_stores_head for more rationale. */
2055 imm_store_chain_info *next, **pnxp;
2056 tree base_addr;
2057 auto_vec<store_immediate_info *> m_store_info;
2058 auto_vec<merged_store_group *> m_merged_store_groups;
2059
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2060 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2061 : next (inspt), pnxp (&inspt), base_addr (b_a)
2062 {
2063 inspt = this;
2064 if (next)
2065 {
2066 gcc_checking_assert (pnxp == next->pnxp);
2067 next->pnxp = &next;
2068 }
2069 }
~imm_store_chain_info()2070 ~imm_store_chain_info ()
2071 {
2072 *pnxp = next;
2073 if (next)
2074 {
2075 gcc_checking_assert (&next == next->pnxp);
2076 next->pnxp = pnxp;
2077 }
2078 }
2079 bool terminate_and_process_chain ();
2080 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2081 unsigned int);
2082 bool coalesce_immediate_stores ();
2083 bool output_merged_store (merged_store_group *);
2084 bool output_merged_stores ();
2085 };
2086
2087 const pass_data pass_data_tree_store_merging = {
2088 GIMPLE_PASS, /* type */
2089 "store-merging", /* name */
2090 OPTGROUP_NONE, /* optinfo_flags */
2091 TV_GIMPLE_STORE_MERGING, /* tv_id */
2092 PROP_ssa, /* properties_required */
2093 0, /* properties_provided */
2094 0, /* properties_destroyed */
2095 0, /* todo_flags_start */
2096 TODO_update_ssa, /* todo_flags_finish */
2097 };
2098
2099 class pass_store_merging : public gimple_opt_pass
2100 {
2101 public:
pass_store_merging(gcc::context * ctxt)2102 pass_store_merging (gcc::context *ctxt)
2103 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2104 {
2105 }
2106
2107 /* Pass not supported for PDP-endian, nor for insane hosts or
2108 target character sizes where native_{encode,interpret}_expr
2109 doesn't work properly. */
2110 virtual bool
gate(function *)2111 gate (function *)
2112 {
2113 return flag_store_merging
2114 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2115 && CHAR_BIT == 8
2116 && BITS_PER_UNIT == 8;
2117 }
2118
2119 virtual unsigned int execute (function *);
2120
2121 private:
2122 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2123
2124 /* Form a doubly-linked stack of the elements of m_stores, so that
2125 we can iterate over them in a predictable way. Using this order
2126 avoids extraneous differences in the compiler output just because
2127 of tree pointer variations (e.g. different chains end up in
2128 different positions of m_stores, so they are handled in different
2129 orders, so they allocate or release SSA names in different
2130 orders, and when they get reused, subsequent passes end up
2131 getting different SSA names, which may ultimately change
2132 decisions when going out of SSA). */
2133 imm_store_chain_info *m_stores_head;
2134
2135 bool process_store (gimple *);
2136 bool terminate_and_process_chain (imm_store_chain_info *);
2137 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2138 bool terminate_and_process_all_chains ();
2139 }; // class pass_store_merging
2140
2141 /* Terminate and process all recorded chains. Return true if any changes
2142 were made. */
2143
2144 bool
terminate_and_process_all_chains()2145 pass_store_merging::terminate_and_process_all_chains ()
2146 {
2147 bool ret = false;
2148 while (m_stores_head)
2149 ret |= terminate_and_process_chain (m_stores_head);
2150 gcc_assert (m_stores.is_empty ());
2151 return ret;
2152 }
2153
2154 /* Terminate all chains that are affected by the statement STMT.
2155 CHAIN_INFO is the chain we should ignore from the checks if
2156 non-NULL. Return true if any changes were made. */
2157
2158 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2159 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2160 **chain_info,
2161 gimple *stmt)
2162 {
2163 bool ret = false;
2164
2165 /* If the statement doesn't touch memory it can't alias. */
2166 if (!gimple_vuse (stmt))
2167 return false;
2168
2169 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2170 ao_ref store_lhs_ref;
2171 ao_ref_init (&store_lhs_ref, store_lhs);
2172 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2173 {
2174 next = cur->next;
2175
2176 /* We already checked all the stores in chain_info and terminated the
2177 chain if necessary. Skip it here. */
2178 if (chain_info && *chain_info == cur)
2179 continue;
2180
2181 store_immediate_info *info;
2182 unsigned int i;
2183 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2184 {
2185 tree lhs = gimple_assign_lhs (info->stmt);
2186 ao_ref lhs_ref;
2187 ao_ref_init (&lhs_ref, lhs);
2188 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2189 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2190 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2191 &lhs_ref, false)))
2192 {
2193 if (dump_file && (dump_flags & TDF_DETAILS))
2194 {
2195 fprintf (dump_file, "stmt causes chain termination:\n");
2196 print_gimple_stmt (dump_file, stmt, 0);
2197 }
2198 ret |= terminate_and_process_chain (cur);
2199 break;
2200 }
2201 }
2202 }
2203
2204 return ret;
2205 }
2206
2207 /* Helper function. Terminate the recorded chain storing to base object
2208 BASE. Return true if the merging and output was successful. The m_stores
2209 entry is removed after the processing in any case. */
2210
2211 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2212 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2213 {
2214 bool ret = chain_info->terminate_and_process_chain ();
2215 m_stores.remove (chain_info->base_addr);
2216 delete chain_info;
2217 return ret;
2218 }
2219
2220 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2221 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2222 be able to sink load of REF across stores between FIRST and LAST, up
2223 to right before LAST. */
2224
2225 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2226 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2227 {
2228 ao_ref r;
2229 ao_ref_init (&r, ref);
2230 unsigned int count = 0;
2231 tree vop = gimple_vdef (last);
2232 gimple *stmt;
2233
2234 /* Return true conservatively if the basic blocks are different. */
2235 if (gimple_bb (first) != gimple_bb (last))
2236 return true;
2237
2238 do
2239 {
2240 stmt = SSA_NAME_DEF_STMT (vop);
2241 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2242 return true;
2243 if (gimple_store_p (stmt)
2244 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2245 return true;
2246 /* Avoid quadratic compile time by bounding the number of checks
2247 we perform. */
2248 if (++count > MAX_STORE_ALIAS_CHECKS)
2249 return true;
2250 vop = gimple_vuse (stmt);
2251 }
2252 while (stmt != first);
2253
2254 return false;
2255 }
2256
2257 /* Return true if INFO->ops[IDX] is mergeable with the
2258 corresponding loads already in MERGED_STORE group.
2259 BASE_ADDR is the base address of the whole store group. */
2260
2261 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2262 compatible_load_p (merged_store_group *merged_store,
2263 store_immediate_info *info,
2264 tree base_addr, int idx)
2265 {
2266 store_immediate_info *infof = merged_store->stores[0];
2267 if (!info->ops[idx].base_addr
2268 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2269 info->bitpos - infof->bitpos)
2270 || !operand_equal_p (info->ops[idx].base_addr,
2271 infof->ops[idx].base_addr, 0))
2272 return false;
2273
2274 store_immediate_info *infol = merged_store->stores.last ();
2275 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2276 /* In this case all vuses should be the same, e.g.
2277 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2278 or
2279 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2280 and we can emit the coalesced load next to any of those loads. */
2281 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2282 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2283 return true;
2284
2285 /* Otherwise, at least for now require that the load has the same
2286 vuse as the store. See following examples. */
2287 if (gimple_vuse (info->stmt) != load_vuse)
2288 return false;
2289
2290 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2291 || (infof != infol
2292 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2293 return false;
2294
2295 /* If the load is from the same location as the store, already
2296 the construction of the immediate chain info guarantees no intervening
2297 stores, so no further checks are needed. Example:
2298 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2299 if (known_eq (info->ops[idx].bitpos, info->bitpos)
2300 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2301 return true;
2302
2303 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2304 of the stores in the group, or any other stores in between those.
2305 Previous calls to compatible_load_p ensured that for all the
2306 merged_store->stores IDX loads, no stmts starting with
2307 merged_store->first_stmt and ending right before merged_store->last_stmt
2308 clobbers those loads. */
2309 gimple *first = merged_store->first_stmt;
2310 gimple *last = merged_store->last_stmt;
2311 unsigned int i;
2312 store_immediate_info *infoc;
2313 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2314 comes before the so far first load, we'll be changing
2315 merged_store->first_stmt. In that case we need to give up if
2316 any of the earlier processed loads clobber with the stmts in the new
2317 range. */
2318 if (info->order < merged_store->first_order)
2319 {
2320 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2321 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2322 return false;
2323 first = info->stmt;
2324 }
2325 /* Similarly, we could change merged_store->last_stmt, so ensure
2326 in that case no stmts in the new range clobber any of the earlier
2327 processed loads. */
2328 else if (info->order > merged_store->last_order)
2329 {
2330 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2331 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2332 return false;
2333 last = info->stmt;
2334 }
2335 /* And finally, we'd be adding a new load to the set, ensure it isn't
2336 clobbered in the new range. */
2337 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2338 return false;
2339
2340 /* Otherwise, we are looking for:
2341 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2342 or
2343 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2344 return true;
2345 }
2346
2347 /* Add all refs loaded to compute VAL to REFS vector. */
2348
2349 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2350 gather_bswap_load_refs (vec<tree> *refs, tree val)
2351 {
2352 if (TREE_CODE (val) != SSA_NAME)
2353 return;
2354
2355 gimple *stmt = SSA_NAME_DEF_STMT (val);
2356 if (!is_gimple_assign (stmt))
2357 return;
2358
2359 if (gimple_assign_load_p (stmt))
2360 {
2361 refs->safe_push (gimple_assign_rhs1 (stmt));
2362 return;
2363 }
2364
2365 switch (gimple_assign_rhs_class (stmt))
2366 {
2367 case GIMPLE_BINARY_RHS:
2368 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2369 /* FALLTHRU */
2370 case GIMPLE_UNARY_RHS:
2371 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2372 break;
2373 default:
2374 gcc_unreachable ();
2375 }
2376 }
2377
2378 /* Check if there are any stores in M_STORE_INFO after index I
2379 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2380 a potential group ending with END that have their order
2381 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2382 all the stores already merged and the one under consideration
2383 have rhs_code of INTEGER_CST. Return true if there are no such stores.
2384 Consider:
2385 MEM[(long long int *)p_28] = 0;
2386 MEM[(long long int *)p_28 + 8B] = 0;
2387 MEM[(long long int *)p_28 + 16B] = 0;
2388 MEM[(long long int *)p_28 + 24B] = 0;
2389 _129 = (int) _130;
2390 MEM[(int *)p_28 + 8B] = _129;
2391 MEM[(int *)p_28].a = -1;
2392 We already have
2393 MEM[(long long int *)p_28] = 0;
2394 MEM[(int *)p_28].a = -1;
2395 stmts in the current group and need to consider if it is safe to
2396 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2397 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2398 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2399 into the group and merging of those 3 stores is successful, merged
2400 stmts will be emitted at the latest store from that group, i.e.
2401 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2402 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2403 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2404 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2405 into the group. That way it will be its own store group and will
2406 not be touched. If ALL_INTEGER_CST_P and there are overlapping
2407 INTEGER_CST stores, those are mergeable using merge_overlapping,
2408 so don't return false for those.
2409
2410 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2411 (exclusive), whether they don't overlap the bitrange START to END
2412 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2413 prevent merging in cases like:
2414 MEM <char[12]> [&b + 8B] = {};
2415 MEM[(short *) &b] = 5;
2416 _5 = *x_4(D);
2417 MEM <long long unsigned int> [&b + 2B] = _5;
2418 MEM[(char *)&b + 16B] = 88;
2419 MEM[(int *)&b + 20B] = 1;
2420 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2421 be merged with it, because the = _5 store overlaps these and is in between
2422 them in sort_by_order ordering. If it was merged, the merged store would
2423 go after the = _5 store and thus change behavior. */
2424
2425 static bool
check_no_overlap(vec<store_immediate_info * > m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2426 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2427 bool all_integer_cst_p, unsigned int first_order,
2428 unsigned int last_order, unsigned HOST_WIDE_INT start,
2429 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2430 unsigned end_earlier)
2431 {
2432 unsigned int len = m_store_info.length ();
2433 for (unsigned int j = first_earlier; j < end_earlier; j++)
2434 {
2435 store_immediate_info *info = m_store_info[j];
2436 if (info->order > first_order
2437 && info->order < last_order
2438 && info->bitpos + info->bitsize > start)
2439 return false;
2440 }
2441 for (++i; i < len; ++i)
2442 {
2443 store_immediate_info *info = m_store_info[i];
2444 if (info->bitpos >= end)
2445 break;
2446 if (info->order < last_order
2447 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2448 return false;
2449 }
2450 return true;
2451 }
2452
2453 /* Return true if m_store_info[first] and at least one following store
2454 form a group which store try_size bitsize value which is byte swapped
2455 from a memory load or some value, or identity from some value.
2456 This uses the bswap pass APIs. */
2457
2458 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2459 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2460 unsigned int first,
2461 unsigned int try_size,
2462 unsigned int first_earlier)
2463 {
2464 unsigned int len = m_store_info.length (), last = first;
2465 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2466 if (width >= try_size)
2467 return false;
2468 for (unsigned int i = first + 1; i < len; ++i)
2469 {
2470 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2471 || m_store_info[i]->lp_nr != merged_store->lp_nr
2472 || m_store_info[i]->ins_stmt == NULL)
2473 return false;
2474 width += m_store_info[i]->bitsize;
2475 if (width >= try_size)
2476 {
2477 last = i;
2478 break;
2479 }
2480 }
2481 if (width != try_size)
2482 return false;
2483
2484 bool allow_unaligned
2485 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2486 /* Punt if the combined store would not be aligned and we need alignment. */
2487 if (!allow_unaligned)
2488 {
2489 unsigned int align = merged_store->align;
2490 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2491 for (unsigned int i = first + 1; i <= last; ++i)
2492 {
2493 unsigned int this_align;
2494 unsigned HOST_WIDE_INT align_bitpos = 0;
2495 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2496 &this_align, &align_bitpos);
2497 if (this_align > align)
2498 {
2499 align = this_align;
2500 align_base = m_store_info[i]->bitpos - align_bitpos;
2501 }
2502 }
2503 unsigned HOST_WIDE_INT align_bitpos
2504 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2505 if (align_bitpos)
2506 align = least_bit_hwi (align_bitpos);
2507 if (align < try_size)
2508 return false;
2509 }
2510
2511 tree type;
2512 switch (try_size)
2513 {
2514 case 16: type = uint16_type_node; break;
2515 case 32: type = uint32_type_node; break;
2516 case 64: type = uint64_type_node; break;
2517 default: gcc_unreachable ();
2518 }
2519 struct symbolic_number n;
2520 gimple *ins_stmt = NULL;
2521 int vuse_store = -1;
2522 unsigned int first_order = merged_store->first_order;
2523 unsigned int last_order = merged_store->last_order;
2524 gimple *first_stmt = merged_store->first_stmt;
2525 gimple *last_stmt = merged_store->last_stmt;
2526 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2527 store_immediate_info *infof = m_store_info[first];
2528
2529 for (unsigned int i = first; i <= last; ++i)
2530 {
2531 store_immediate_info *info = m_store_info[i];
2532 struct symbolic_number this_n = info->n;
2533 this_n.type = type;
2534 if (!this_n.base_addr)
2535 this_n.range = try_size / BITS_PER_UNIT;
2536 else
2537 /* Update vuse in case it has changed by output_merged_stores. */
2538 this_n.vuse = gimple_vuse (info->ins_stmt);
2539 unsigned int bitpos = info->bitpos - infof->bitpos;
2540 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2541 BYTES_BIG_ENDIAN
2542 ? try_size - info->bitsize - bitpos
2543 : bitpos))
2544 return false;
2545 if (this_n.base_addr && vuse_store)
2546 {
2547 unsigned int j;
2548 for (j = first; j <= last; ++j)
2549 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2550 break;
2551 if (j > last)
2552 {
2553 if (vuse_store == 1)
2554 return false;
2555 vuse_store = 0;
2556 }
2557 }
2558 if (i == first)
2559 {
2560 n = this_n;
2561 ins_stmt = info->ins_stmt;
2562 }
2563 else
2564 {
2565 if (n.base_addr && n.vuse != this_n.vuse)
2566 {
2567 if (vuse_store == 0)
2568 return false;
2569 vuse_store = 1;
2570 }
2571 if (info->order > last_order)
2572 {
2573 last_order = info->order;
2574 last_stmt = info->stmt;
2575 }
2576 else if (info->order < first_order)
2577 {
2578 first_order = info->order;
2579 first_stmt = info->stmt;
2580 }
2581 end = MAX (end, info->bitpos + info->bitsize);
2582
2583 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2584 &this_n, &n);
2585 if (ins_stmt == NULL)
2586 return false;
2587 }
2588 }
2589
2590 uint64_t cmpxchg, cmpnop;
2591 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2592
2593 /* A complete byte swap should make the symbolic number to start with
2594 the largest digit in the highest order byte. Unchanged symbolic
2595 number indicates a read with same endianness as target architecture. */
2596 if (n.n != cmpnop && n.n != cmpxchg)
2597 return false;
2598
2599 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2600 return false;
2601
2602 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2603 merged_store->start, end, first_earlier, first))
2604 return false;
2605
2606 /* Don't handle memory copy this way if normal non-bswap processing
2607 would handle it too. */
2608 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2609 {
2610 unsigned int i;
2611 for (i = first; i <= last; ++i)
2612 if (m_store_info[i]->rhs_code != MEM_REF)
2613 break;
2614 if (i == last + 1)
2615 return false;
2616 }
2617
2618 if (n.n == cmpxchg)
2619 switch (try_size)
2620 {
2621 case 16:
2622 /* Will emit LROTATE_EXPR. */
2623 break;
2624 case 32:
2625 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2626 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2627 break;
2628 return false;
2629 case 64:
2630 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2631 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2632 break;
2633 return false;
2634 default:
2635 gcc_unreachable ();
2636 }
2637
2638 if (!allow_unaligned && n.base_addr)
2639 {
2640 unsigned int align = get_object_alignment (n.src);
2641 if (align < try_size)
2642 return false;
2643 }
2644
2645 /* If each load has vuse of the corresponding store, need to verify
2646 the loads can be sunk right before the last store. */
2647 if (vuse_store == 1)
2648 {
2649 auto_vec<tree, 64> refs;
2650 for (unsigned int i = first; i <= last; ++i)
2651 gather_bswap_load_refs (&refs,
2652 gimple_assign_rhs1 (m_store_info[i]->stmt));
2653
2654 unsigned int i;
2655 tree ref;
2656 FOR_EACH_VEC_ELT (refs, i, ref)
2657 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2658 return false;
2659 n.vuse = NULL_TREE;
2660 }
2661
2662 infof->n = n;
2663 infof->ins_stmt = ins_stmt;
2664 for (unsigned int i = first; i <= last; ++i)
2665 {
2666 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2667 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2668 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2669 if (i != first)
2670 merged_store->merge_into (m_store_info[i]);
2671 }
2672
2673 return true;
2674 }
2675
2676 /* Go through the candidate stores recorded in m_store_info and merge them
2677 into merged_store_group objects recorded into m_merged_store_groups
2678 representing the widened stores. Return true if coalescing was successful
2679 and the number of widened stores is fewer than the original number
2680 of stores. */
2681
2682 bool
coalesce_immediate_stores()2683 imm_store_chain_info::coalesce_immediate_stores ()
2684 {
2685 /* Anything less can't be processed. */
2686 if (m_store_info.length () < 2)
2687 return false;
2688
2689 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2691 m_store_info.length ());
2692
2693 store_immediate_info *info;
2694 unsigned int i, ignore = 0;
2695 unsigned int first_earlier = 0;
2696 unsigned int end_earlier = 0;
2697
2698 /* Order the stores by the bitposition they write to. */
2699 m_store_info.qsort (sort_by_bitpos);
2700
2701 info = m_store_info[0];
2702 merged_store_group *merged_store = new merged_store_group (info);
2703 if (dump_file && (dump_flags & TDF_DETAILS))
2704 fputs ("New store group\n", dump_file);
2705
2706 FOR_EACH_VEC_ELT (m_store_info, i, info)
2707 {
2708 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2709
2710 if (i <= ignore)
2711 goto done;
2712
2713 while (first_earlier < end_earlier
2714 && (m_store_info[first_earlier]->bitpos
2715 + m_store_info[first_earlier]->bitsize
2716 <= merged_store->start))
2717 first_earlier++;
2718
2719 /* First try to handle group of stores like:
2720 p[0] = data >> 24;
2721 p[1] = data >> 16;
2722 p[2] = data >> 8;
2723 p[3] = data;
2724 using the bswap framework. */
2725 if (info->bitpos == merged_store->start + merged_store->width
2726 && merged_store->stores.length () == 1
2727 && merged_store->stores[0]->ins_stmt != NULL
2728 && info->lp_nr == merged_store->lp_nr
2729 && info->ins_stmt != NULL)
2730 {
2731 unsigned int try_size;
2732 for (try_size = 64; try_size >= 16; try_size >>= 1)
2733 if (try_coalesce_bswap (merged_store, i - 1, try_size,
2734 first_earlier))
2735 break;
2736
2737 if (try_size >= 16)
2738 {
2739 ignore = i + merged_store->stores.length () - 1;
2740 m_merged_store_groups.safe_push (merged_store);
2741 if (ignore < m_store_info.length ())
2742 {
2743 merged_store = new merged_store_group (m_store_info[ignore]);
2744 end_earlier = ignore;
2745 }
2746 else
2747 merged_store = NULL;
2748 goto done;
2749 }
2750 }
2751
2752 new_bitregion_start
2753 = MIN (merged_store->bitregion_start, info->bitregion_start);
2754 new_bitregion_end
2755 = MAX (merged_store->bitregion_end, info->bitregion_end);
2756
2757 if (info->order >= merged_store->first_nonmergeable_order
2758 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2759 > (unsigned) param_store_merging_max_size))
2760 ;
2761
2762 /* |---store 1---|
2763 |---store 2---|
2764 Overlapping stores. */
2765 else if (IN_RANGE (info->bitpos, merged_store->start,
2766 merged_store->start + merged_store->width - 1)
2767 /* |---store 1---||---store 2---|
2768 Handle also the consecutive INTEGER_CST stores case here,
2769 as we have here the code to deal with overlaps. */
2770 || (info->bitregion_start <= merged_store->bitregion_end
2771 && info->rhs_code == INTEGER_CST
2772 && merged_store->only_constants
2773 && merged_store->can_be_merged_into (info)))
2774 {
2775 /* Only allow overlapping stores of constants. */
2776 if (info->rhs_code == INTEGER_CST
2777 && merged_store->only_constants
2778 && info->lp_nr == merged_store->lp_nr)
2779 {
2780 unsigned int first_order
2781 = MIN (merged_store->first_order, info->order);
2782 unsigned int last_order
2783 = MAX (merged_store->last_order, info->order);
2784 unsigned HOST_WIDE_INT end
2785 = MAX (merged_store->start + merged_store->width,
2786 info->bitpos + info->bitsize);
2787 if (check_no_overlap (m_store_info, i, true, first_order,
2788 last_order, merged_store->start, end,
2789 first_earlier, end_earlier))
2790 {
2791 /* check_no_overlap call above made sure there are no
2792 overlapping stores with non-INTEGER_CST rhs_code
2793 in between the first and last of the stores we've
2794 just merged. If there are any INTEGER_CST rhs_code
2795 stores in between, we need to merge_overlapping them
2796 even if in the sort_by_bitpos order there are other
2797 overlapping stores in between. Keep those stores as is.
2798 Example:
2799 MEM[(int *)p_28] = 0;
2800 MEM[(char *)p_28 + 3B] = 1;
2801 MEM[(char *)p_28 + 1B] = 2;
2802 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2803 We can't merge the zero store with the store of two and
2804 not merge anything else, because the store of one is
2805 in the original order in between those two, but in
2806 store_by_bitpos order it comes after the last store that
2807 we can't merge with them. We can merge the first 3 stores
2808 and keep the last store as is though. */
2809 unsigned int len = m_store_info.length ();
2810 unsigned int try_order = last_order;
2811 unsigned int first_nonmergeable_order;
2812 unsigned int k;
2813 bool last_iter = false;
2814 int attempts = 0;
2815 do
2816 {
2817 unsigned int max_order = 0;
2818 unsigned int min_order = first_order;
2819 unsigned first_nonmergeable_int_order = ~0U;
2820 unsigned HOST_WIDE_INT this_end = end;
2821 k = i;
2822 first_nonmergeable_order = ~0U;
2823 for (unsigned int j = i + 1; j < len; ++j)
2824 {
2825 store_immediate_info *info2 = m_store_info[j];
2826 if (info2->bitpos >= this_end)
2827 break;
2828 if (info2->order < try_order)
2829 {
2830 if (info2->rhs_code != INTEGER_CST
2831 || info2->lp_nr != merged_store->lp_nr)
2832 {
2833 /* Normally check_no_overlap makes sure this
2834 doesn't happen, but if end grows below,
2835 then we need to process more stores than
2836 check_no_overlap verified. Example:
2837 MEM[(int *)p_5] = 0;
2838 MEM[(short *)p_5 + 3B] = 1;
2839 MEM[(char *)p_5 + 4B] = _9;
2840 MEM[(char *)p_5 + 2B] = 2; */
2841 k = 0;
2842 break;
2843 }
2844 k = j;
2845 min_order = MIN (min_order, info2->order);
2846 this_end = MAX (this_end,
2847 info2->bitpos + info2->bitsize);
2848 }
2849 else if (info2->rhs_code == INTEGER_CST
2850 && info2->lp_nr == merged_store->lp_nr
2851 && !last_iter)
2852 {
2853 max_order = MAX (max_order, info2->order + 1);
2854 first_nonmergeable_int_order
2855 = MIN (first_nonmergeable_int_order,
2856 info2->order);
2857 }
2858 else
2859 first_nonmergeable_order
2860 = MIN (first_nonmergeable_order, info2->order);
2861 }
2862 if (k > i
2863 && !check_no_overlap (m_store_info, len - 1, true,
2864 min_order, try_order,
2865 merged_store->start, this_end,
2866 first_earlier, end_earlier))
2867 k = 0;
2868 if (k == 0)
2869 {
2870 if (last_order == try_order)
2871 break;
2872 /* If this failed, but only because we grew
2873 try_order, retry with the last working one,
2874 so that we merge at least something. */
2875 try_order = last_order;
2876 last_iter = true;
2877 continue;
2878 }
2879 last_order = try_order;
2880 /* Retry with a larger try_order to see if we could
2881 merge some further INTEGER_CST stores. */
2882 if (max_order
2883 && (first_nonmergeable_int_order
2884 < first_nonmergeable_order))
2885 {
2886 try_order = MIN (max_order,
2887 first_nonmergeable_order);
2888 try_order
2889 = MIN (try_order,
2890 merged_store->first_nonmergeable_order);
2891 if (try_order > last_order && ++attempts < 16)
2892 continue;
2893 }
2894 first_nonmergeable_order
2895 = MIN (first_nonmergeable_order,
2896 first_nonmergeable_int_order);
2897 end = this_end;
2898 break;
2899 }
2900 while (1);
2901
2902 if (k != 0)
2903 {
2904 merged_store->merge_overlapping (info);
2905
2906 merged_store->first_nonmergeable_order
2907 = MIN (merged_store->first_nonmergeable_order,
2908 first_nonmergeable_order);
2909
2910 for (unsigned int j = i + 1; j <= k; j++)
2911 {
2912 store_immediate_info *info2 = m_store_info[j];
2913 gcc_assert (info2->bitpos < end);
2914 if (info2->order < last_order)
2915 {
2916 gcc_assert (info2->rhs_code == INTEGER_CST);
2917 if (info != info2)
2918 merged_store->merge_overlapping (info2);
2919 }
2920 /* Other stores are kept and not merged in any
2921 way. */
2922 }
2923 ignore = k;
2924 goto done;
2925 }
2926 }
2927 }
2928 }
2929 /* |---store 1---||---store 2---|
2930 This store is consecutive to the previous one.
2931 Merge it into the current store group. There can be gaps in between
2932 the stores, but there can't be gaps in between bitregions. */
2933 else if (info->bitregion_start <= merged_store->bitregion_end
2934 && merged_store->can_be_merged_into (info))
2935 {
2936 store_immediate_info *infof = merged_store->stores[0];
2937
2938 /* All the rhs_code ops that take 2 operands are commutative,
2939 swap the operands if it could make the operands compatible. */
2940 if (infof->ops[0].base_addr
2941 && infof->ops[1].base_addr
2942 && info->ops[0].base_addr
2943 && info->ops[1].base_addr
2944 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2945 info->bitpos - infof->bitpos)
2946 && operand_equal_p (info->ops[1].base_addr,
2947 infof->ops[0].base_addr, 0))
2948 {
2949 std::swap (info->ops[0], info->ops[1]);
2950 info->ops_swapped_p = true;
2951 }
2952 if (check_no_overlap (m_store_info, i, false,
2953 MIN (merged_store->first_order, info->order),
2954 MAX (merged_store->last_order, info->order),
2955 merged_store->start,
2956 MAX (merged_store->start + merged_store->width,
2957 info->bitpos + info->bitsize),
2958 first_earlier, end_earlier))
2959 {
2960 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2961 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2962 {
2963 info->rhs_code = BIT_INSERT_EXPR;
2964 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2965 info->ops[0].base_addr = NULL_TREE;
2966 }
2967 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2968 {
2969 store_immediate_info *infoj;
2970 unsigned int j;
2971 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2972 {
2973 infoj->rhs_code = BIT_INSERT_EXPR;
2974 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2975 infoj->ops[0].base_addr = NULL_TREE;
2976 }
2977 }
2978 if ((infof->ops[0].base_addr
2979 ? compatible_load_p (merged_store, info, base_addr, 0)
2980 : !info->ops[0].base_addr)
2981 && (infof->ops[1].base_addr
2982 ? compatible_load_p (merged_store, info, base_addr, 1)
2983 : !info->ops[1].base_addr))
2984 {
2985 merged_store->merge_into (info);
2986 goto done;
2987 }
2988 }
2989 }
2990
2991 /* |---store 1---| <gap> |---store 2---|.
2992 Gap between stores or the rhs not compatible. Start a new group. */
2993
2994 /* Try to apply all the stores recorded for the group to determine
2995 the bitpattern they write and discard it if that fails.
2996 This will also reject single-store groups. */
2997 if (merged_store->apply_stores ())
2998 m_merged_store_groups.safe_push (merged_store);
2999 else
3000 delete merged_store;
3001
3002 merged_store = new merged_store_group (info);
3003 end_earlier = i;
3004 if (dump_file && (dump_flags & TDF_DETAILS))
3005 fputs ("New store group\n", dump_file);
3006
3007 done:
3008 if (dump_file && (dump_flags & TDF_DETAILS))
3009 {
3010 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3011 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3012 i, info->bitsize, info->bitpos);
3013 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3014 fputc ('\n', dump_file);
3015 }
3016 }
3017
3018 /* Record or discard the last store group. */
3019 if (merged_store)
3020 {
3021 if (merged_store->apply_stores ())
3022 m_merged_store_groups.safe_push (merged_store);
3023 else
3024 delete merged_store;
3025 }
3026
3027 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3028
3029 bool success
3030 = !m_merged_store_groups.is_empty ()
3031 && m_merged_store_groups.length () < m_store_info.length ();
3032
3033 if (success && dump_file)
3034 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3035 m_merged_store_groups.length ());
3036
3037 return success;
3038 }
3039
3040 /* Return the type to use for the merged stores or loads described by STMTS.
3041 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3042 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3043 of the MEM_REFs if any. */
3044
3045 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3046 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3047 unsigned short *cliquep, unsigned short *basep)
3048 {
3049 gimple *stmt;
3050 unsigned int i;
3051 tree type = NULL_TREE;
3052 tree ret = NULL_TREE;
3053 *cliquep = 0;
3054 *basep = 0;
3055
3056 FOR_EACH_VEC_ELT (stmts, i, stmt)
3057 {
3058 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3059 : gimple_assign_lhs (stmt);
3060 tree type1 = reference_alias_ptr_type (ref);
3061 tree base = get_base_address (ref);
3062
3063 if (i == 0)
3064 {
3065 if (TREE_CODE (base) == MEM_REF)
3066 {
3067 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3068 *basep = MR_DEPENDENCE_BASE (base);
3069 }
3070 ret = type = type1;
3071 continue;
3072 }
3073 if (!alias_ptr_types_compatible_p (type, type1))
3074 ret = ptr_type_node;
3075 if (TREE_CODE (base) != MEM_REF
3076 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3077 || *basep != MR_DEPENDENCE_BASE (base))
3078 {
3079 *cliquep = 0;
3080 *basep = 0;
3081 }
3082 }
3083 return ret;
3084 }
3085
3086 /* Return the location_t information we can find among the statements
3087 in STMTS. */
3088
3089 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3090 get_location_for_stmts (vec<gimple *> &stmts)
3091 {
3092 gimple *stmt;
3093 unsigned int i;
3094
3095 FOR_EACH_VEC_ELT (stmts, i, stmt)
3096 if (gimple_has_location (stmt))
3097 return gimple_location (stmt);
3098
3099 return UNKNOWN_LOCATION;
3100 }
3101
3102 /* Used to decribe a store resulting from splitting a wide store in smaller
3103 regularly-sized stores in split_group. */
3104
3105 class split_store
3106 {
3107 public:
3108 unsigned HOST_WIDE_INT bytepos;
3109 unsigned HOST_WIDE_INT size;
3110 unsigned HOST_WIDE_INT align;
3111 auto_vec<store_immediate_info *> orig_stores;
3112 /* True if there is a single orig stmt covering the whole split store. */
3113 bool orig;
3114 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3115 unsigned HOST_WIDE_INT);
3116 };
3117
3118 /* Simple constructor. */
3119
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3120 split_store::split_store (unsigned HOST_WIDE_INT bp,
3121 unsigned HOST_WIDE_INT sz,
3122 unsigned HOST_WIDE_INT al)
3123 : bytepos (bp), size (sz), align (al), orig (false)
3124 {
3125 orig_stores.create (0);
3126 }
3127
3128 /* Record all stores in GROUP that write to the region starting at BITPOS and
3129 is of size BITSIZE. Record infos for such statements in STORES if
3130 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3131 if there is exactly one original store in the range (in that case ignore
3132 clobber stmts, unless there are only clobber stmts). */
3133
3134 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3135 find_constituent_stores (class merged_store_group *group,
3136 vec<store_immediate_info *> *stores,
3137 unsigned int *first,
3138 unsigned HOST_WIDE_INT bitpos,
3139 unsigned HOST_WIDE_INT bitsize)
3140 {
3141 store_immediate_info *info, *ret = NULL;
3142 unsigned int i;
3143 bool second = false;
3144 bool update_first = true;
3145 unsigned HOST_WIDE_INT end = bitpos + bitsize;
3146 for (i = *first; group->stores.iterate (i, &info); ++i)
3147 {
3148 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3149 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3150 if (stmt_end <= bitpos)
3151 {
3152 /* BITPOS passed to this function never decreases from within the
3153 same split_group call, so optimize and don't scan info records
3154 which are known to end before or at BITPOS next time.
3155 Only do it if all stores before this one also pass this. */
3156 if (update_first)
3157 *first = i + 1;
3158 continue;
3159 }
3160 else
3161 update_first = false;
3162
3163 /* The stores in GROUP are ordered by bitposition so if we're past
3164 the region for this group return early. */
3165 if (stmt_start >= end)
3166 return ret;
3167
3168 if (gimple_clobber_p (info->stmt))
3169 {
3170 if (stores)
3171 stores->safe_push (info);
3172 if (ret == NULL)
3173 ret = info;
3174 continue;
3175 }
3176 if (stores)
3177 {
3178 stores->safe_push (info);
3179 if (ret && !gimple_clobber_p (ret->stmt))
3180 {
3181 ret = NULL;
3182 second = true;
3183 }
3184 }
3185 else if (ret && !gimple_clobber_p (ret->stmt))
3186 return NULL;
3187 if (!second)
3188 ret = info;
3189 }
3190 return ret;
3191 }
3192
3193 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3194 store have multiple uses. If any SSA_NAME has multiple uses, also
3195 count statements needed to compute it. */
3196
3197 static unsigned
count_multiple_uses(store_immediate_info * info)3198 count_multiple_uses (store_immediate_info *info)
3199 {
3200 gimple *stmt = info->stmt;
3201 unsigned ret = 0;
3202 switch (info->rhs_code)
3203 {
3204 case INTEGER_CST:
3205 return 0;
3206 case BIT_AND_EXPR:
3207 case BIT_IOR_EXPR:
3208 case BIT_XOR_EXPR:
3209 if (info->bit_not_p)
3210 {
3211 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3212 ret = 1; /* Fall through below to return
3213 the BIT_NOT_EXPR stmt and then
3214 BIT_{AND,IOR,XOR}_EXPR and anything it
3215 uses. */
3216 else
3217 /* stmt is after this the BIT_NOT_EXPR. */
3218 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3219 }
3220 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3221 {
3222 ret += 1 + info->ops[0].bit_not_p;
3223 if (info->ops[1].base_addr)
3224 ret += 1 + info->ops[1].bit_not_p;
3225 return ret + 1;
3226 }
3227 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3228 /* stmt is now the BIT_*_EXPR. */
3229 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3230 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3231 else if (info->ops[info->ops_swapped_p].bit_not_p)
3232 {
3233 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3234 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3235 ++ret;
3236 }
3237 if (info->ops[1].base_addr == NULL_TREE)
3238 {
3239 gcc_checking_assert (!info->ops_swapped_p);
3240 return ret;
3241 }
3242 if (!has_single_use (gimple_assign_rhs2 (stmt)))
3243 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3244 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3245 {
3246 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3247 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3248 ++ret;
3249 }
3250 return ret;
3251 case MEM_REF:
3252 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3253 return 1 + info->ops[0].bit_not_p;
3254 else if (info->ops[0].bit_not_p)
3255 {
3256 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3257 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3258 return 1;
3259 }
3260 return 0;
3261 case BIT_INSERT_EXPR:
3262 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3263 default:
3264 gcc_unreachable ();
3265 }
3266 }
3267
3268 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3269 vector (if non-NULL) with split_store structs describing the byte offset
3270 (from the base), the bit size and alignment of each store as well as the
3271 original statements involved in each such split group.
3272 This is to separate the splitting strategy from the statement
3273 building/emission/linking done in output_merged_store.
3274 Return number of new stores.
3275 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3276 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3277 BZERO_FIRST may be true only when the first store covers the whole group
3278 and clears it; if BZERO_FIRST is true, keep that first store in the set
3279 unmodified and emit further stores for the overrides only.
3280 If SPLIT_STORES is NULL, it is just a dry run to count number of
3281 new stores. */
3282
3283 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3284 split_group (merged_store_group *group, bool allow_unaligned_store,
3285 bool allow_unaligned_load, bool bzero_first,
3286 vec<split_store *> *split_stores,
3287 unsigned *total_orig,
3288 unsigned *total_new)
3289 {
3290 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3291 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3292 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3293 unsigned HOST_WIDE_INT group_align = group->align;
3294 unsigned HOST_WIDE_INT align_base = group->align_base;
3295 unsigned HOST_WIDE_INT group_load_align = group_align;
3296 bool any_orig = false;
3297
3298 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3299
3300 if (group->stores[0]->rhs_code == LROTATE_EXPR
3301 || group->stores[0]->rhs_code == NOP_EXPR)
3302 {
3303 gcc_assert (!bzero_first);
3304 /* For bswap framework using sets of stores, all the checking
3305 has been done earlier in try_coalesce_bswap and needs to be
3306 emitted as a single store. */
3307 if (total_orig)
3308 {
3309 /* Avoid the old/new stmt count heuristics. It should be
3310 always beneficial. */
3311 total_new[0] = 1;
3312 total_orig[0] = 2;
3313 }
3314
3315 if (split_stores)
3316 {
3317 unsigned HOST_WIDE_INT align_bitpos
3318 = (group->start - align_base) & (group_align - 1);
3319 unsigned HOST_WIDE_INT align = group_align;
3320 if (align_bitpos)
3321 align = least_bit_hwi (align_bitpos);
3322 bytepos = group->start / BITS_PER_UNIT;
3323 split_store *store
3324 = new split_store (bytepos, group->width, align);
3325 unsigned int first = 0;
3326 find_constituent_stores (group, &store->orig_stores,
3327 &first, group->start, group->width);
3328 split_stores->safe_push (store);
3329 }
3330
3331 return 1;
3332 }
3333
3334 unsigned int ret = 0, first = 0;
3335 unsigned HOST_WIDE_INT try_pos = bytepos;
3336
3337 if (total_orig)
3338 {
3339 unsigned int i;
3340 store_immediate_info *info = group->stores[0];
3341
3342 total_new[0] = 0;
3343 total_orig[0] = 1; /* The orig store. */
3344 info = group->stores[0];
3345 if (info->ops[0].base_addr)
3346 total_orig[0]++;
3347 if (info->ops[1].base_addr)
3348 total_orig[0]++;
3349 switch (info->rhs_code)
3350 {
3351 case BIT_AND_EXPR:
3352 case BIT_IOR_EXPR:
3353 case BIT_XOR_EXPR:
3354 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3355 break;
3356 default:
3357 break;
3358 }
3359 total_orig[0] *= group->stores.length ();
3360
3361 FOR_EACH_VEC_ELT (group->stores, i, info)
3362 {
3363 total_new[0] += count_multiple_uses (info);
3364 total_orig[0] += (info->bit_not_p
3365 + info->ops[0].bit_not_p
3366 + info->ops[1].bit_not_p);
3367 }
3368 }
3369
3370 if (!allow_unaligned_load)
3371 for (int i = 0; i < 2; ++i)
3372 if (group->load_align[i])
3373 group_load_align = MIN (group_load_align, group->load_align[i]);
3374
3375 if (bzero_first)
3376 {
3377 store_immediate_info *gstore;
3378 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3379 if (!gimple_clobber_p (gstore->stmt))
3380 break;
3381 ++first;
3382 ret = 1;
3383 if (split_stores)
3384 {
3385 split_store *store
3386 = new split_store (bytepos, gstore->bitsize, align_base);
3387 store->orig_stores.safe_push (gstore);
3388 store->orig = true;
3389 any_orig = true;
3390 split_stores->safe_push (store);
3391 }
3392 }
3393
3394 while (size > 0)
3395 {
3396 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3397 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3398 || (bzero_first && group->val[try_pos - bytepos] == 0)))
3399 {
3400 /* Skip padding bytes. */
3401 ++try_pos;
3402 size -= BITS_PER_UNIT;
3403 continue;
3404 }
3405
3406 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3407 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3408 unsigned HOST_WIDE_INT align_bitpos
3409 = (try_bitpos - align_base) & (group_align - 1);
3410 unsigned HOST_WIDE_INT align = group_align;
3411 bool found_orig = false;
3412 if (align_bitpos)
3413 align = least_bit_hwi (align_bitpos);
3414 if (!allow_unaligned_store)
3415 try_size = MIN (try_size, align);
3416 if (!allow_unaligned_load)
3417 {
3418 /* If we can't do or don't want to do unaligned stores
3419 as well as loads, we need to take the loads into account
3420 as well. */
3421 unsigned HOST_WIDE_INT load_align = group_load_align;
3422 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3423 if (align_bitpos)
3424 load_align = least_bit_hwi (align_bitpos);
3425 for (int i = 0; i < 2; ++i)
3426 if (group->load_align[i])
3427 {
3428 align_bitpos
3429 = known_alignment (try_bitpos
3430 - group->stores[0]->bitpos
3431 + group->stores[0]->ops[i].bitpos
3432 - group->load_align_base[i]);
3433 if (align_bitpos & (group_load_align - 1))
3434 {
3435 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3436 load_align = MIN (load_align, a);
3437 }
3438 }
3439 try_size = MIN (try_size, load_align);
3440 }
3441 store_immediate_info *info
3442 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3443 if (info && !gimple_clobber_p (info->stmt))
3444 {
3445 /* If there is just one original statement for the range, see if
3446 we can just reuse the original store which could be even larger
3447 than try_size. */
3448 unsigned HOST_WIDE_INT stmt_end
3449 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3450 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3451 stmt_end - try_bitpos);
3452 if (info && info->bitpos >= try_bitpos)
3453 {
3454 store_immediate_info *info2 = NULL;
3455 unsigned int first_copy = first;
3456 if (info->bitpos > try_bitpos
3457 && stmt_end - try_bitpos <= try_size)
3458 {
3459 info2 = find_constituent_stores (group, NULL, &first_copy,
3460 try_bitpos,
3461 info->bitpos - try_bitpos);
3462 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3463 }
3464 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3465 {
3466 info2 = find_constituent_stores (group, NULL, &first_copy,
3467 stmt_end,
3468 (try_bitpos + try_size)
3469 - stmt_end);
3470 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3471 }
3472 if (info2 == NULL)
3473 {
3474 try_size = stmt_end - try_bitpos;
3475 found_orig = true;
3476 goto found;
3477 }
3478 }
3479 }
3480
3481 /* Approximate store bitsize for the case when there are no padding
3482 bits. */
3483 while (try_size > size)
3484 try_size /= 2;
3485 /* Now look for whole padding bytes at the end of that bitsize. */
3486 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3487 if (group->mask[try_pos - bytepos + nonmasked - 1]
3488 != (unsigned char) ~0U
3489 && (!bzero_first
3490 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
3491 break;
3492 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3493 {
3494 /* If entire try_size range is padding, skip it. */
3495 try_pos += try_size / BITS_PER_UNIT;
3496 size -= try_size;
3497 continue;
3498 }
3499 /* Otherwise try to decrease try_size if second half, last 3 quarters
3500 etc. are padding. */
3501 nonmasked *= BITS_PER_UNIT;
3502 while (nonmasked <= try_size / 2)
3503 try_size /= 2;
3504 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3505 {
3506 /* Now look for whole padding bytes at the start of that bitsize. */
3507 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3508 for (masked = 0; masked < try_bytesize; ++masked)
3509 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3510 && (!bzero_first
3511 || group->val[try_pos - bytepos + masked] != 0))
3512 break;
3513 masked *= BITS_PER_UNIT;
3514 gcc_assert (masked < try_size);
3515 if (masked >= try_size / 2)
3516 {
3517 while (masked >= try_size / 2)
3518 {
3519 try_size /= 2;
3520 try_pos += try_size / BITS_PER_UNIT;
3521 size -= try_size;
3522 masked -= try_size;
3523 }
3524 /* Need to recompute the alignment, so just retry at the new
3525 position. */
3526 continue;
3527 }
3528 }
3529
3530 found:
3531 ++ret;
3532
3533 if (split_stores)
3534 {
3535 split_store *store
3536 = new split_store (try_pos, try_size, align);
3537 info = find_constituent_stores (group, &store->orig_stores,
3538 &first, try_bitpos, try_size);
3539 if (info
3540 && !gimple_clobber_p (info->stmt)
3541 && info->bitpos >= try_bitpos
3542 && info->bitpos + info->bitsize <= try_bitpos + try_size
3543 && (store->orig_stores.length () == 1
3544 || found_orig
3545 || (info->bitpos == try_bitpos
3546 && (info->bitpos + info->bitsize
3547 == try_bitpos + try_size))))
3548 {
3549 store->orig = true;
3550 any_orig = true;
3551 }
3552 split_stores->safe_push (store);
3553 }
3554
3555 try_pos += try_size / BITS_PER_UNIT;
3556 size -= try_size;
3557 }
3558
3559 if (total_orig)
3560 {
3561 unsigned int i;
3562 split_store *store;
3563 /* If we are reusing some original stores and any of the
3564 original SSA_NAMEs had multiple uses, we need to subtract
3565 those now before we add the new ones. */
3566 if (total_new[0] && any_orig)
3567 {
3568 FOR_EACH_VEC_ELT (*split_stores, i, store)
3569 if (store->orig)
3570 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3571 }
3572 total_new[0] += ret; /* The new store. */
3573 store_immediate_info *info = group->stores[0];
3574 if (info->ops[0].base_addr)
3575 total_new[0] += ret;
3576 if (info->ops[1].base_addr)
3577 total_new[0] += ret;
3578 switch (info->rhs_code)
3579 {
3580 case BIT_AND_EXPR:
3581 case BIT_IOR_EXPR:
3582 case BIT_XOR_EXPR:
3583 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3584 break;
3585 default:
3586 break;
3587 }
3588 FOR_EACH_VEC_ELT (*split_stores, i, store)
3589 {
3590 unsigned int j;
3591 bool bit_not_p[3] = { false, false, false };
3592 /* If all orig_stores have certain bit_not_p set, then
3593 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3594 If some orig_stores have certain bit_not_p set, then
3595 we'd use a BIT_XOR_EXPR with a mask and need to account for
3596 it. */
3597 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3598 {
3599 if (info->ops[0].bit_not_p)
3600 bit_not_p[0] = true;
3601 if (info->ops[1].bit_not_p)
3602 bit_not_p[1] = true;
3603 if (info->bit_not_p)
3604 bit_not_p[2] = true;
3605 }
3606 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3607 }
3608
3609 }
3610
3611 return ret;
3612 }
3613
3614 /* Return the operation through which the operand IDX (if < 2) or
3615 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3616 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3617 the bits should be xored with mask. */
3618
3619 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3620 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3621 {
3622 unsigned int i;
3623 store_immediate_info *info;
3624 unsigned int cnt = 0;
3625 bool any_paddings = false;
3626 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3627 {
3628 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3629 if (bit_not_p)
3630 {
3631 ++cnt;
3632 tree lhs = gimple_assign_lhs (info->stmt);
3633 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3634 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3635 any_paddings = true;
3636 }
3637 }
3638 mask = NULL_TREE;
3639 if (cnt == 0)
3640 return NOP_EXPR;
3641 if (cnt == split_store->orig_stores.length () && !any_paddings)
3642 return BIT_NOT_EXPR;
3643
3644 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3645 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3646 unsigned char *buf
3647 = XALLOCAVEC (unsigned char, buf_size);
3648 memset (buf, ~0U, buf_size);
3649 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3650 {
3651 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3652 if (!bit_not_p)
3653 continue;
3654 /* Clear regions with bit_not_p and invert afterwards, rather than
3655 clear regions with !bit_not_p, so that gaps in between stores aren't
3656 set in the mask. */
3657 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3658 unsigned HOST_WIDE_INT prec = bitsize;
3659 unsigned int pos_in_buffer = 0;
3660 if (any_paddings)
3661 {
3662 tree lhs = gimple_assign_lhs (info->stmt);
3663 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3664 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3665 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3666 }
3667 if (info->bitpos < try_bitpos)
3668 {
3669 gcc_assert (info->bitpos + bitsize > try_bitpos);
3670 if (!BYTES_BIG_ENDIAN)
3671 {
3672 if (prec <= try_bitpos - info->bitpos)
3673 continue;
3674 prec -= try_bitpos - info->bitpos;
3675 }
3676 bitsize -= try_bitpos - info->bitpos;
3677 if (BYTES_BIG_ENDIAN && prec > bitsize)
3678 prec = bitsize;
3679 }
3680 else
3681 pos_in_buffer = info->bitpos - try_bitpos;
3682 if (prec < bitsize)
3683 {
3684 /* If this is a bool inversion, invert just the least significant
3685 prec bits rather than all bits of it. */
3686 if (BYTES_BIG_ENDIAN)
3687 {
3688 pos_in_buffer += bitsize - prec;
3689 if (pos_in_buffer >= split_store->size)
3690 continue;
3691 }
3692 bitsize = prec;
3693 }
3694 if (pos_in_buffer + bitsize > split_store->size)
3695 bitsize = split_store->size - pos_in_buffer;
3696 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3697 if (BYTES_BIG_ENDIAN)
3698 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3699 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3700 else
3701 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3702 }
3703 for (unsigned int i = 0; i < buf_size; ++i)
3704 buf[i] = ~buf[i];
3705 mask = native_interpret_expr (int_type, buf, buf_size);
3706 return BIT_XOR_EXPR;
3707 }
3708
3709 /* Given a merged store group GROUP output the widened version of it.
3710 The store chain is against the base object BASE.
3711 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3712 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3713 Make sure that the number of statements output is less than the number of
3714 original statements. If a better sequence is possible emit it and
3715 return true. */
3716
3717 bool
output_merged_store(merged_store_group * group)3718 imm_store_chain_info::output_merged_store (merged_store_group *group)
3719 {
3720 split_store *split_store;
3721 unsigned int i;
3722 unsigned HOST_WIDE_INT start_byte_pos
3723 = group->bitregion_start / BITS_PER_UNIT;
3724
3725 unsigned int orig_num_stmts = group->stores.length ();
3726 if (orig_num_stmts < 2)
3727 return false;
3728
3729 auto_vec<class split_store *, 32> split_stores;
3730 bool allow_unaligned_store
3731 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3732 bool allow_unaligned_load = allow_unaligned_store;
3733 bool bzero_first = false;
3734 store_immediate_info *store;
3735 unsigned int num_clobber_stmts = 0;
3736 if (group->stores[0]->rhs_code == INTEGER_CST)
3737 {
3738 FOR_EACH_VEC_ELT (group->stores, i, store)
3739 if (gimple_clobber_p (store->stmt))
3740 num_clobber_stmts++;
3741 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3742 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3743 && group->start == store->bitpos
3744 && group->width == store->bitsize
3745 && (group->start % BITS_PER_UNIT) == 0
3746 && (group->width % BITS_PER_UNIT) == 0)
3747 {
3748 bzero_first = true;
3749 break;
3750 }
3751 else
3752 break;
3753 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3754 if (gimple_clobber_p (store->stmt))
3755 num_clobber_stmts++;
3756 if (num_clobber_stmts == orig_num_stmts)
3757 return false;
3758 orig_num_stmts -= num_clobber_stmts;
3759 }
3760 if (allow_unaligned_store || bzero_first)
3761 {
3762 /* If unaligned stores are allowed, see how many stores we'd emit
3763 for unaligned and how many stores we'd emit for aligned stores.
3764 Only use unaligned stores if it allows fewer stores than aligned.
3765 Similarly, if there is a whole region clear first, prefer expanding
3766 it together compared to expanding clear first followed by merged
3767 further stores. */
3768 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3769 int pass_min = 0;
3770 for (int pass = 0; pass < 4; ++pass)
3771 {
3772 if (!allow_unaligned_store && (pass & 1) != 0)
3773 continue;
3774 if (!bzero_first && (pass & 2) != 0)
3775 continue;
3776 cnt[pass] = split_group (group, (pass & 1) != 0,
3777 allow_unaligned_load, (pass & 2) != 0,
3778 NULL, NULL, NULL);
3779 if (cnt[pass] < cnt[pass_min])
3780 pass_min = pass;
3781 }
3782 if ((pass_min & 1) == 0)
3783 allow_unaligned_store = false;
3784 if ((pass_min & 2) == 0)
3785 bzero_first = false;
3786 }
3787 unsigned total_orig, total_new;
3788 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3789 &split_stores, &total_orig, &total_new);
3790
3791 /* Determine if there is a clobber covering the whole group at the start,
3792 followed by proposed split stores that cover the whole group. In that
3793 case, prefer the transformation even if
3794 split_stores.length () == orig_num_stmts. */
3795 bool clobber_first = false;
3796 if (num_clobber_stmts
3797 && gimple_clobber_p (group->stores[0]->stmt)
3798 && group->start == group->stores[0]->bitpos
3799 && group->width == group->stores[0]->bitsize
3800 && (group->start % BITS_PER_UNIT) == 0
3801 && (group->width % BITS_PER_UNIT) == 0)
3802 {
3803 clobber_first = true;
3804 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3805 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3806 if (split_store->bytepos != pos)
3807 {
3808 clobber_first = false;
3809 break;
3810 }
3811 else
3812 pos += split_store->size / BITS_PER_UNIT;
3813 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3814 clobber_first = false;
3815 }
3816
3817 if (split_stores.length () >= orig_num_stmts + clobber_first)
3818 {
3819
3820 /* We didn't manage to reduce the number of statements. Bail out. */
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3823 " Not profitable to emit new sequence.\n",
3824 orig_num_stmts);
3825 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3826 delete split_store;
3827 return false;
3828 }
3829 if (total_orig <= total_new)
3830 {
3831 /* If number of estimated new statements is above estimated original
3832 statements, bail out too. */
3833 if (dump_file && (dump_flags & TDF_DETAILS))
3834 fprintf (dump_file, "Estimated number of original stmts (%u)"
3835 " not larger than estimated number of new"
3836 " stmts (%u).\n",
3837 total_orig, total_new);
3838 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3839 delete split_store;
3840 return false;
3841 }
3842 if (group->stores[0]->rhs_code == INTEGER_CST)
3843 {
3844 bool all_orig = true;
3845 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3846 if (!split_store->orig)
3847 {
3848 all_orig = false;
3849 break;
3850 }
3851 if (all_orig)
3852 {
3853 unsigned int cnt = split_stores.length ();
3854 store_immediate_info *store;
3855 FOR_EACH_VEC_ELT (group->stores, i, store)
3856 if (gimple_clobber_p (store->stmt))
3857 ++cnt;
3858 /* Punt if we wouldn't make any real changes, i.e. keep all
3859 orig stmts + all clobbers. */
3860 if (cnt == group->stores.length ())
3861 {
3862 if (dump_file && (dump_flags & TDF_DETAILS))
3863 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3864 " Not profitable to emit new sequence.\n",
3865 orig_num_stmts);
3866 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3867 delete split_store;
3868 return false;
3869 }
3870 }
3871 }
3872
3873 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3874 gimple_seq seq = NULL;
3875 tree last_vdef, new_vuse;
3876 last_vdef = gimple_vdef (group->last_stmt);
3877 new_vuse = gimple_vuse (group->last_stmt);
3878 tree bswap_res = NULL_TREE;
3879
3880 /* Clobbers are not removed. */
3881 if (gimple_clobber_p (group->last_stmt))
3882 {
3883 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3884 gimple_set_vdef (group->last_stmt, new_vuse);
3885 }
3886
3887 if (group->stores[0]->rhs_code == LROTATE_EXPR
3888 || group->stores[0]->rhs_code == NOP_EXPR)
3889 {
3890 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3891 gimple *ins_stmt = group->stores[0]->ins_stmt;
3892 struct symbolic_number *n = &group->stores[0]->n;
3893 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3894
3895 switch (n->range)
3896 {
3897 case 16:
3898 load_type = bswap_type = uint16_type_node;
3899 break;
3900 case 32:
3901 load_type = uint32_type_node;
3902 if (bswap)
3903 {
3904 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3905 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3906 }
3907 break;
3908 case 64:
3909 load_type = uint64_type_node;
3910 if (bswap)
3911 {
3912 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3913 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3914 }
3915 break;
3916 default:
3917 gcc_unreachable ();
3918 }
3919
3920 /* If the loads have each vuse of the corresponding store,
3921 we've checked the aliasing already in try_coalesce_bswap and
3922 we want to sink the need load into seq. So need to use new_vuse
3923 on the load. */
3924 if (n->base_addr)
3925 {
3926 if (n->vuse == NULL)
3927 {
3928 n->vuse = new_vuse;
3929 ins_stmt = NULL;
3930 }
3931 else
3932 /* Update vuse in case it has changed by output_merged_stores. */
3933 n->vuse = gimple_vuse (ins_stmt);
3934 }
3935 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3936 bswap_type, load_type, n, bswap);
3937 gcc_assert (bswap_res);
3938 }
3939
3940 gimple *stmt = NULL;
3941 auto_vec<gimple *, 32> orig_stmts;
3942 gimple_seq this_seq;
3943 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3944 is_gimple_mem_ref_addr, NULL_TREE);
3945 gimple_seq_add_seq_without_update (&seq, this_seq);
3946
3947 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3948 gimple_seq load_seq[2] = { NULL, NULL };
3949 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3950 for (int j = 0; j < 2; ++j)
3951 {
3952 store_operand_info &op = group->stores[0]->ops[j];
3953 if (op.base_addr == NULL_TREE)
3954 continue;
3955
3956 store_immediate_info *infol = group->stores.last ();
3957 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3958 {
3959 /* We can't pick the location randomly; while we've verified
3960 all the loads have the same vuse, they can be still in different
3961 basic blocks and we need to pick the one from the last bb:
3962 int x = q[0];
3963 if (x == N) return;
3964 int y = q[1];
3965 p[0] = x;
3966 p[1] = y;
3967 otherwise if we put the wider load at the q[0] load, we might
3968 segfault if q[1] is not mapped. */
3969 basic_block bb = gimple_bb (op.stmt);
3970 gimple *ostmt = op.stmt;
3971 store_immediate_info *info;
3972 FOR_EACH_VEC_ELT (group->stores, i, info)
3973 {
3974 gimple *tstmt = info->ops[j].stmt;
3975 basic_block tbb = gimple_bb (tstmt);
3976 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3977 {
3978 ostmt = tstmt;
3979 bb = tbb;
3980 }
3981 }
3982 load_gsi[j] = gsi_for_stmt (ostmt);
3983 load_addr[j]
3984 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3985 &load_seq[j], is_gimple_mem_ref_addr,
3986 NULL_TREE);
3987 }
3988 else if (operand_equal_p (base_addr, op.base_addr, 0))
3989 load_addr[j] = addr;
3990 else
3991 {
3992 load_addr[j]
3993 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3994 &this_seq, is_gimple_mem_ref_addr,
3995 NULL_TREE);
3996 gimple_seq_add_seq_without_update (&seq, this_seq);
3997 }
3998 }
3999
4000 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4001 {
4002 unsigned HOST_WIDE_INT try_size = split_store->size;
4003 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4004 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4005 unsigned HOST_WIDE_INT align = split_store->align;
4006 tree dest, src;
4007 location_t loc;
4008 if (split_store->orig)
4009 {
4010 /* If there is just a single non-clobber constituent store
4011 which covers the whole area, just reuse the lhs and rhs. */
4012 gimple *orig_stmt = NULL;
4013 store_immediate_info *store;
4014 unsigned int j;
4015 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4016 if (!gimple_clobber_p (store->stmt))
4017 {
4018 orig_stmt = store->stmt;
4019 break;
4020 }
4021 dest = gimple_assign_lhs (orig_stmt);
4022 src = gimple_assign_rhs1 (orig_stmt);
4023 loc = gimple_location (orig_stmt);
4024 }
4025 else
4026 {
4027 store_immediate_info *info;
4028 unsigned short clique, base;
4029 unsigned int k;
4030 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4031 orig_stmts.safe_push (info->stmt);
4032 tree offset_type
4033 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4034 loc = get_location_for_stmts (orig_stmts);
4035 orig_stmts.truncate (0);
4036
4037 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4038 int_type = build_aligned_type (int_type, align);
4039 dest = fold_build2 (MEM_REF, int_type, addr,
4040 build_int_cst (offset_type, try_pos));
4041 if (TREE_CODE (dest) == MEM_REF)
4042 {
4043 MR_DEPENDENCE_CLIQUE (dest) = clique;
4044 MR_DEPENDENCE_BASE (dest) = base;
4045 }
4046
4047 tree mask;
4048 if (bswap_res)
4049 mask = integer_zero_node;
4050 else
4051 mask = native_interpret_expr (int_type,
4052 group->mask + try_pos
4053 - start_byte_pos,
4054 group->buf_size);
4055
4056 tree ops[2];
4057 for (int j = 0;
4058 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4059 ++j)
4060 {
4061 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4062 if (bswap_res)
4063 ops[j] = bswap_res;
4064 else if (op.base_addr)
4065 {
4066 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4067 orig_stmts.safe_push (info->ops[j].stmt);
4068
4069 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4070 &clique, &base);
4071 location_t load_loc = get_location_for_stmts (orig_stmts);
4072 orig_stmts.truncate (0);
4073
4074 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4075 unsigned HOST_WIDE_INT align_bitpos
4076 = known_alignment (try_bitpos
4077 - split_store->orig_stores[0]->bitpos
4078 + op.bitpos);
4079 if (align_bitpos & (load_align - 1))
4080 load_align = least_bit_hwi (align_bitpos);
4081
4082 tree load_int_type
4083 = build_nonstandard_integer_type (try_size, UNSIGNED);
4084 load_int_type
4085 = build_aligned_type (load_int_type, load_align);
4086
4087 poly_uint64 load_pos
4088 = exact_div (try_bitpos
4089 - split_store->orig_stores[0]->bitpos
4090 + op.bitpos,
4091 BITS_PER_UNIT);
4092 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4093 build_int_cst (offset_type, load_pos));
4094 if (TREE_CODE (ops[j]) == MEM_REF)
4095 {
4096 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4097 MR_DEPENDENCE_BASE (ops[j]) = base;
4098 }
4099 if (!integer_zerop (mask))
4100 /* The load might load some bits (that will be masked off
4101 later on) uninitialized, avoid -W*uninitialized
4102 warnings in that case. */
4103 TREE_NO_WARNING (ops[j]) = 1;
4104
4105 stmt = gimple_build_assign (make_ssa_name (int_type),
4106 ops[j]);
4107 gimple_set_location (stmt, load_loc);
4108 if (gsi_bb (load_gsi[j]))
4109 {
4110 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4111 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4112 }
4113 else
4114 {
4115 gimple_set_vuse (stmt, new_vuse);
4116 gimple_seq_add_stmt_without_update (&seq, stmt);
4117 }
4118 ops[j] = gimple_assign_lhs (stmt);
4119 tree xor_mask;
4120 enum tree_code inv_op
4121 = invert_op (split_store, j, int_type, xor_mask);
4122 if (inv_op != NOP_EXPR)
4123 {
4124 stmt = gimple_build_assign (make_ssa_name (int_type),
4125 inv_op, ops[j], xor_mask);
4126 gimple_set_location (stmt, load_loc);
4127 ops[j] = gimple_assign_lhs (stmt);
4128
4129 if (gsi_bb (load_gsi[j]))
4130 gimple_seq_add_stmt_without_update (&load_seq[j],
4131 stmt);
4132 else
4133 gimple_seq_add_stmt_without_update (&seq, stmt);
4134 }
4135 }
4136 else
4137 ops[j] = native_interpret_expr (int_type,
4138 group->val + try_pos
4139 - start_byte_pos,
4140 group->buf_size);
4141 }
4142
4143 switch (split_store->orig_stores[0]->rhs_code)
4144 {
4145 case BIT_AND_EXPR:
4146 case BIT_IOR_EXPR:
4147 case BIT_XOR_EXPR:
4148 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4149 {
4150 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4151 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4152 }
4153 location_t bit_loc;
4154 bit_loc = get_location_for_stmts (orig_stmts);
4155 orig_stmts.truncate (0);
4156
4157 stmt
4158 = gimple_build_assign (make_ssa_name (int_type),
4159 split_store->orig_stores[0]->rhs_code,
4160 ops[0], ops[1]);
4161 gimple_set_location (stmt, bit_loc);
4162 /* If there is just one load and there is a separate
4163 load_seq[0], emit the bitwise op right after it. */
4164 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4165 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4166 /* Otherwise, if at least one load is in seq, we need to
4167 emit the bitwise op right before the store. If there
4168 are two loads and are emitted somewhere else, it would
4169 be better to emit the bitwise op as early as possible;
4170 we don't track where that would be possible right now
4171 though. */
4172 else
4173 gimple_seq_add_stmt_without_update (&seq, stmt);
4174 src = gimple_assign_lhs (stmt);
4175 tree xor_mask;
4176 enum tree_code inv_op;
4177 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4178 if (inv_op != NOP_EXPR)
4179 {
4180 stmt = gimple_build_assign (make_ssa_name (int_type),
4181 inv_op, src, xor_mask);
4182 gimple_set_location (stmt, bit_loc);
4183 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4184 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4185 else
4186 gimple_seq_add_stmt_without_update (&seq, stmt);
4187 src = gimple_assign_lhs (stmt);
4188 }
4189 break;
4190 case LROTATE_EXPR:
4191 case NOP_EXPR:
4192 src = ops[0];
4193 if (!is_gimple_val (src))
4194 {
4195 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4196 src);
4197 gimple_seq_add_stmt_without_update (&seq, stmt);
4198 src = gimple_assign_lhs (stmt);
4199 }
4200 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4201 {
4202 stmt = gimple_build_assign (make_ssa_name (int_type),
4203 NOP_EXPR, src);
4204 gimple_seq_add_stmt_without_update (&seq, stmt);
4205 src = gimple_assign_lhs (stmt);
4206 }
4207 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4208 if (inv_op != NOP_EXPR)
4209 {
4210 stmt = gimple_build_assign (make_ssa_name (int_type),
4211 inv_op, src, xor_mask);
4212 gimple_set_location (stmt, loc);
4213 gimple_seq_add_stmt_without_update (&seq, stmt);
4214 src = gimple_assign_lhs (stmt);
4215 }
4216 break;
4217 default:
4218 src = ops[0];
4219 break;
4220 }
4221
4222 /* If bit insertion is required, we use the source as an accumulator
4223 into which the successive bit-field values are manually inserted.
4224 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4225 if (group->bit_insertion)
4226 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4227 if (info->rhs_code == BIT_INSERT_EXPR
4228 && info->bitpos < try_bitpos + try_size
4229 && info->bitpos + info->bitsize > try_bitpos)
4230 {
4231 /* Mask, truncate, convert to final type, shift and ior into
4232 the accumulator. Note that every step can be a no-op. */
4233 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4234 const HOST_WIDE_INT end_gap
4235 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4236 tree tem = info->ops[0].val;
4237 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4238 {
4239 tree bitfield_type
4240 = build_nonstandard_integer_type (info->bitsize,
4241 UNSIGNED);
4242 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4243 }
4244 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4245 {
4246 const unsigned HOST_WIDE_INT imask
4247 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4248 tem = gimple_build (&seq, loc,
4249 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4250 build_int_cst (TREE_TYPE (tem),
4251 imask));
4252 }
4253 const HOST_WIDE_INT shift
4254 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4255 if (shift < 0)
4256 tem = gimple_build (&seq, loc,
4257 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4258 build_int_cst (NULL_TREE, -shift));
4259 tem = gimple_convert (&seq, loc, int_type, tem);
4260 if (shift > 0)
4261 tem = gimple_build (&seq, loc,
4262 LSHIFT_EXPR, int_type, tem,
4263 build_int_cst (NULL_TREE, shift));
4264 src = gimple_build (&seq, loc,
4265 BIT_IOR_EXPR, int_type, tem, src);
4266 }
4267
4268 if (!integer_zerop (mask))
4269 {
4270 tree tem = make_ssa_name (int_type);
4271 tree load_src = unshare_expr (dest);
4272 /* The load might load some or all bits uninitialized,
4273 avoid -W*uninitialized warnings in that case.
4274 As optimization, it would be nice if all the bits are
4275 provably uninitialized (no stores at all yet or previous
4276 store a CLOBBER) we'd optimize away the load and replace
4277 it e.g. with 0. */
4278 TREE_NO_WARNING (load_src) = 1;
4279 stmt = gimple_build_assign (tem, load_src);
4280 gimple_set_location (stmt, loc);
4281 gimple_set_vuse (stmt, new_vuse);
4282 gimple_seq_add_stmt_without_update (&seq, stmt);
4283
4284 /* FIXME: If there is a single chunk of zero bits in mask,
4285 perhaps use BIT_INSERT_EXPR instead? */
4286 stmt = gimple_build_assign (make_ssa_name (int_type),
4287 BIT_AND_EXPR, tem, mask);
4288 gimple_set_location (stmt, loc);
4289 gimple_seq_add_stmt_without_update (&seq, stmt);
4290 tem = gimple_assign_lhs (stmt);
4291
4292 if (TREE_CODE (src) == INTEGER_CST)
4293 src = wide_int_to_tree (int_type,
4294 wi::bit_and_not (wi::to_wide (src),
4295 wi::to_wide (mask)));
4296 else
4297 {
4298 tree nmask
4299 = wide_int_to_tree (int_type,
4300 wi::bit_not (wi::to_wide (mask)));
4301 stmt = gimple_build_assign (make_ssa_name (int_type),
4302 BIT_AND_EXPR, src, nmask);
4303 gimple_set_location (stmt, loc);
4304 gimple_seq_add_stmt_without_update (&seq, stmt);
4305 src = gimple_assign_lhs (stmt);
4306 }
4307 stmt = gimple_build_assign (make_ssa_name (int_type),
4308 BIT_IOR_EXPR, tem, src);
4309 gimple_set_location (stmt, loc);
4310 gimple_seq_add_stmt_without_update (&seq, stmt);
4311 src = gimple_assign_lhs (stmt);
4312 }
4313 }
4314
4315 stmt = gimple_build_assign (dest, src);
4316 gimple_set_location (stmt, loc);
4317 gimple_set_vuse (stmt, new_vuse);
4318 gimple_seq_add_stmt_without_update (&seq, stmt);
4319
4320 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4321 add_stmt_to_eh_lp (stmt, group->lp_nr);
4322
4323 tree new_vdef;
4324 if (i < split_stores.length () - 1)
4325 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4326 else
4327 new_vdef = last_vdef;
4328
4329 gimple_set_vdef (stmt, new_vdef);
4330 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4331 new_vuse = new_vdef;
4332 }
4333
4334 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4335 delete split_store;
4336
4337 gcc_assert (seq);
4338 if (dump_file)
4339 {
4340 fprintf (dump_file,
4341 "New sequence of %u stores to replace old one of %u stores\n",
4342 split_stores.length (), orig_num_stmts);
4343 if (dump_flags & TDF_DETAILS)
4344 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4345 }
4346
4347 if (gimple_clobber_p (group->last_stmt))
4348 update_stmt (group->last_stmt);
4349
4350 if (group->lp_nr > 0)
4351 {
4352 /* We're going to insert a sequence of (potentially) throwing stores
4353 into an active EH region. This means that we're going to create
4354 new basic blocks with EH edges pointing to the post landing pad
4355 and, therefore, to have to update its PHI nodes, if any. For the
4356 virtual PHI node, we're going to use the VDEFs created above, but
4357 for the other nodes, we need to record the original reaching defs. */
4358 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4359 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4360 basic_block last_bb = gimple_bb (group->last_stmt);
4361 edge last_edge = find_edge (last_bb, lp_bb);
4362 auto_vec<tree, 16> last_defs;
4363 gphi_iterator gpi;
4364 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4365 {
4366 gphi *phi = gpi.phi ();
4367 tree last_def;
4368 if (virtual_operand_p (gimple_phi_result (phi)))
4369 last_def = NULL_TREE;
4370 else
4371 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4372 last_defs.safe_push (last_def);
4373 }
4374
4375 /* Do the insertion. Then, if new basic blocks have been created in the
4376 process, rewind the chain of VDEFs create above to walk the new basic
4377 blocks and update the corresponding arguments of the PHI nodes. */
4378 update_modified_stmts (seq);
4379 if (gimple_find_sub_bbs (seq, &last_gsi))
4380 while (last_vdef != gimple_vuse (group->last_stmt))
4381 {
4382 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4383 if (stmt_could_throw_p (cfun, stmt))
4384 {
4385 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4386 unsigned int i;
4387 for (gpi = gsi_start_phis (lp_bb), i = 0;
4388 !gsi_end_p (gpi);
4389 gsi_next (&gpi), i++)
4390 {
4391 gphi *phi = gpi.phi ();
4392 tree new_def;
4393 if (virtual_operand_p (gimple_phi_result (phi)))
4394 new_def = last_vdef;
4395 else
4396 new_def = last_defs[i];
4397 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4398 }
4399 }
4400 last_vdef = gimple_vuse (stmt);
4401 }
4402 }
4403 else
4404 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4405
4406 for (int j = 0; j < 2; ++j)
4407 if (load_seq[j])
4408 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4409
4410 return true;
4411 }
4412
4413 /* Process the merged_store_group objects created in the coalescing phase.
4414 The stores are all against the base object BASE.
4415 Try to output the widened stores and delete the original statements if
4416 successful. Return true iff any changes were made. */
4417
4418 bool
output_merged_stores()4419 imm_store_chain_info::output_merged_stores ()
4420 {
4421 unsigned int i;
4422 merged_store_group *merged_store;
4423 bool ret = false;
4424 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4425 {
4426 if (dbg_cnt (store_merging)
4427 && output_merged_store (merged_store))
4428 {
4429 unsigned int j;
4430 store_immediate_info *store;
4431 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4432 {
4433 gimple *stmt = store->stmt;
4434 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4435 /* Don't remove clobbers, they are still useful even if
4436 everything is overwritten afterwards. */
4437 if (gimple_clobber_p (stmt))
4438 continue;
4439 gsi_remove (&gsi, true);
4440 if (store->lp_nr)
4441 remove_stmt_from_eh_lp (stmt);
4442 if (stmt != merged_store->last_stmt)
4443 {
4444 unlink_stmt_vdef (stmt);
4445 release_defs (stmt);
4446 }
4447 }
4448 ret = true;
4449 }
4450 }
4451 if (ret && dump_file)
4452 fprintf (dump_file, "Merging successful!\n");
4453
4454 return ret;
4455 }
4456
4457 /* Coalesce the store_immediate_info objects recorded against the base object
4458 BASE in the first phase and output them.
4459 Delete the allocated structures.
4460 Return true if any changes were made. */
4461
4462 bool
terminate_and_process_chain()4463 imm_store_chain_info::terminate_and_process_chain ()
4464 {
4465 /* Process store chain. */
4466 bool ret = false;
4467 if (m_store_info.length () > 1)
4468 {
4469 ret = coalesce_immediate_stores ();
4470 if (ret)
4471 ret = output_merged_stores ();
4472 }
4473
4474 /* Delete all the entries we allocated ourselves. */
4475 store_immediate_info *info;
4476 unsigned int i;
4477 FOR_EACH_VEC_ELT (m_store_info, i, info)
4478 delete info;
4479
4480 merged_store_group *merged_info;
4481 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4482 delete merged_info;
4483
4484 return ret;
4485 }
4486
4487 /* Return true iff LHS is a destination potentially interesting for
4488 store merging. In practice these are the codes that get_inner_reference
4489 can process. */
4490
4491 static bool
lhs_valid_for_store_merging_p(tree lhs)4492 lhs_valid_for_store_merging_p (tree lhs)
4493 {
4494 if (DECL_P (lhs))
4495 return true;
4496
4497 switch (TREE_CODE (lhs))
4498 {
4499 case ARRAY_REF:
4500 case ARRAY_RANGE_REF:
4501 case BIT_FIELD_REF:
4502 case COMPONENT_REF:
4503 case MEM_REF:
4504 return true;
4505 default:
4506 return false;
4507 }
4508
4509 gcc_unreachable ();
4510 }
4511
4512 /* Return true if the tree RHS is a constant we want to consider
4513 during store merging. In practice accept all codes that
4514 native_encode_expr accepts. */
4515
4516 static bool
rhs_valid_for_store_merging_p(tree rhs)4517 rhs_valid_for_store_merging_p (tree rhs)
4518 {
4519 unsigned HOST_WIDE_INT size;
4520 if (TREE_CODE (rhs) == CONSTRUCTOR
4521 && CONSTRUCTOR_NELTS (rhs) == 0
4522 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4523 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4524 return true;
4525 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4526 && native_encode_expr (rhs, NULL, size) != 0);
4527 }
4528
4529 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4530 and return true on success or false on failure. */
4531
4532 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4533 adjust_bit_pos (poly_offset_int byte_off,
4534 poly_int64 *pbitpos,
4535 poly_uint64 *pbitregion_start,
4536 poly_uint64 *pbitregion_end)
4537 {
4538 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4539 bit_off += *pbitpos;
4540
4541 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4542 {
4543 if (maybe_ne (*pbitregion_end, 0U))
4544 {
4545 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4546 bit_off += *pbitregion_start;
4547 if (bit_off.to_uhwi (pbitregion_start))
4548 {
4549 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4550 bit_off += *pbitregion_end;
4551 if (!bit_off.to_uhwi (pbitregion_end))
4552 *pbitregion_end = 0;
4553 }
4554 else
4555 *pbitregion_end = 0;
4556 }
4557 return true;
4558 }
4559 else
4560 return false;
4561 }
4562
4563 /* If MEM is a memory reference usable for store merging (either as
4564 store destination or for loads), return the non-NULL base_addr
4565 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4566 Otherwise return NULL, *PBITPOS should be still valid even for that
4567 case. */
4568
4569 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4570 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4571 poly_uint64 *pbitpos,
4572 poly_uint64 *pbitregion_start,
4573 poly_uint64 *pbitregion_end)
4574 {
4575 poly_int64 bitsize, bitpos;
4576 poly_uint64 bitregion_start = 0, bitregion_end = 0;
4577 machine_mode mode;
4578 int unsignedp = 0, reversep = 0, volatilep = 0;
4579 tree offset;
4580 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4581 &unsignedp, &reversep, &volatilep);
4582 *pbitsize = bitsize;
4583 if (known_le (bitsize, 0))
4584 return NULL_TREE;
4585
4586 if (TREE_CODE (mem) == COMPONENT_REF
4587 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4588 {
4589 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4590 if (maybe_ne (bitregion_end, 0U))
4591 bitregion_end += 1;
4592 }
4593
4594 if (reversep)
4595 return NULL_TREE;
4596
4597 /* We do not want to rewrite TARGET_MEM_REFs. */
4598 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4599 return NULL_TREE;
4600 /* In some cases get_inner_reference may return a
4601 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4602 canonicalize the base_addr to MEM_REF [ptr] and take
4603 byteoffset into account in the bitpos. This occurs in
4604 PR 23684 and this way we can catch more chains. */
4605 else if (TREE_CODE (base_addr) == MEM_REF)
4606 {
4607 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4608 &bitregion_start, &bitregion_end))
4609 return NULL_TREE;
4610 base_addr = TREE_OPERAND (base_addr, 0);
4611 }
4612 /* get_inner_reference returns the base object, get at its
4613 address now. */
4614 else
4615 {
4616 if (maybe_lt (bitpos, 0))
4617 return NULL_TREE;
4618 base_addr = build_fold_addr_expr (base_addr);
4619 }
4620
4621 if (offset)
4622 {
4623 /* If the access is variable offset then a base decl has to be
4624 address-taken to be able to emit pointer-based stores to it.
4625 ??? We might be able to get away with re-using the original
4626 base up to the first variable part and then wrapping that inside
4627 a BIT_FIELD_REF. */
4628 tree base = get_base_address (base_addr);
4629 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4630 return NULL_TREE;
4631
4632 /* Similarly to above for the base, remove constant from the offset. */
4633 if (TREE_CODE (offset) == PLUS_EXPR
4634 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4635 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4636 &bitpos, &bitregion_start, &bitregion_end))
4637 offset = TREE_OPERAND (offset, 0);
4638
4639 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4640 base_addr, offset);
4641 }
4642
4643 if (known_eq (bitregion_end, 0U))
4644 {
4645 bitregion_start = round_down_to_byte_boundary (bitpos);
4646 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4647 }
4648
4649 *pbitsize = bitsize;
4650 *pbitpos = bitpos;
4651 *pbitregion_start = bitregion_start;
4652 *pbitregion_end = bitregion_end;
4653 return base_addr;
4654 }
4655
4656 /* Return true if STMT is a load that can be used for store merging.
4657 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4658 BITREGION_END are properties of the corresponding store. */
4659
4660 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)4661 handled_load (gimple *stmt, store_operand_info *op,
4662 poly_uint64 bitsize, poly_uint64 bitpos,
4663 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4664 {
4665 if (!is_gimple_assign (stmt))
4666 return false;
4667 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4668 {
4669 tree rhs1 = gimple_assign_rhs1 (stmt);
4670 if (TREE_CODE (rhs1) == SSA_NAME
4671 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4672 bitregion_start, bitregion_end))
4673 {
4674 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4675 been optimized earlier, but if allowed here, would confuse the
4676 multiple uses counting. */
4677 if (op->bit_not_p)
4678 return false;
4679 op->bit_not_p = !op->bit_not_p;
4680 return true;
4681 }
4682 return false;
4683 }
4684 if (gimple_vuse (stmt)
4685 && gimple_assign_load_p (stmt)
4686 && !stmt_can_throw_internal (cfun, stmt)
4687 && !gimple_has_volatile_ops (stmt))
4688 {
4689 tree mem = gimple_assign_rhs1 (stmt);
4690 op->base_addr
4691 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4692 &op->bitregion_start,
4693 &op->bitregion_end);
4694 if (op->base_addr != NULL_TREE
4695 && known_eq (op->bitsize, bitsize)
4696 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4697 && known_ge (op->bitpos - op->bitregion_start,
4698 bitpos - bitregion_start)
4699 && known_ge (op->bitregion_end - op->bitpos,
4700 bitregion_end - bitpos))
4701 {
4702 op->stmt = stmt;
4703 op->val = mem;
4704 op->bit_not_p = false;
4705 return true;
4706 }
4707 }
4708 return false;
4709 }
4710
4711 /* Return the index number of the landing pad for STMT, if any. */
4712
4713 static int
lp_nr_for_store(gimple * stmt)4714 lp_nr_for_store (gimple *stmt)
4715 {
4716 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4717 return 0;
4718
4719 if (!stmt_could_throw_p (cfun, stmt))
4720 return 0;
4721
4722 return lookup_stmt_eh_lp (stmt);
4723 }
4724
4725 /* Record the store STMT for store merging optimization if it can be
4726 optimized. Return true if any changes were made. */
4727
4728 bool
process_store(gimple * stmt)4729 pass_store_merging::process_store (gimple *stmt)
4730 {
4731 tree lhs = gimple_assign_lhs (stmt);
4732 tree rhs = gimple_assign_rhs1 (stmt);
4733 poly_uint64 bitsize, bitpos;
4734 poly_uint64 bitregion_start, bitregion_end;
4735 tree base_addr
4736 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4737 &bitregion_start, &bitregion_end);
4738 if (known_eq (bitsize, 0U))
4739 return false;
4740
4741 bool invalid = (base_addr == NULL_TREE
4742 || (maybe_gt (bitsize,
4743 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4744 && TREE_CODE (rhs) != INTEGER_CST
4745 && (TREE_CODE (rhs) != CONSTRUCTOR
4746 || CONSTRUCTOR_NELTS (rhs) != 0)));
4747 enum tree_code rhs_code = ERROR_MARK;
4748 bool bit_not_p = false;
4749 struct symbolic_number n;
4750 gimple *ins_stmt = NULL;
4751 store_operand_info ops[2];
4752 if (invalid)
4753 ;
4754 else if (rhs_valid_for_store_merging_p (rhs))
4755 {
4756 rhs_code = INTEGER_CST;
4757 ops[0].val = rhs;
4758 }
4759 else if (TREE_CODE (rhs) != SSA_NAME)
4760 invalid = true;
4761 else
4762 {
4763 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4764 if (!is_gimple_assign (def_stmt))
4765 invalid = true;
4766 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4767 bitregion_start, bitregion_end))
4768 rhs_code = MEM_REF;
4769 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4770 {
4771 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4772 if (TREE_CODE (rhs1) == SSA_NAME
4773 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4774 {
4775 bit_not_p = true;
4776 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4777 }
4778 }
4779
4780 if (rhs_code == ERROR_MARK && !invalid)
4781 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4782 {
4783 case BIT_AND_EXPR:
4784 case BIT_IOR_EXPR:
4785 case BIT_XOR_EXPR:
4786 tree rhs1, rhs2;
4787 rhs1 = gimple_assign_rhs1 (def_stmt);
4788 rhs2 = gimple_assign_rhs2 (def_stmt);
4789 invalid = true;
4790 if (TREE_CODE (rhs1) != SSA_NAME)
4791 break;
4792 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4793 if (!is_gimple_assign (def_stmt1)
4794 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4795 bitregion_start, bitregion_end))
4796 break;
4797 if (rhs_valid_for_store_merging_p (rhs2))
4798 ops[1].val = rhs2;
4799 else if (TREE_CODE (rhs2) != SSA_NAME)
4800 break;
4801 else
4802 {
4803 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4804 if (!is_gimple_assign (def_stmt2))
4805 break;
4806 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4807 bitregion_start, bitregion_end))
4808 break;
4809 }
4810 invalid = false;
4811 break;
4812 default:
4813 invalid = true;
4814 break;
4815 }
4816
4817 unsigned HOST_WIDE_INT const_bitsize;
4818 if (bitsize.is_constant (&const_bitsize)
4819 && (const_bitsize % BITS_PER_UNIT) == 0
4820 && const_bitsize <= 64
4821 && multiple_p (bitpos, BITS_PER_UNIT))
4822 {
4823 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4824 if (ins_stmt)
4825 {
4826 uint64_t nn = n.n;
4827 for (unsigned HOST_WIDE_INT i = 0;
4828 i < const_bitsize;
4829 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4830 if ((nn & MARKER_MASK) == 0
4831 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4832 {
4833 ins_stmt = NULL;
4834 break;
4835 }
4836 if (ins_stmt)
4837 {
4838 if (invalid)
4839 {
4840 rhs_code = LROTATE_EXPR;
4841 ops[0].base_addr = NULL_TREE;
4842 ops[1].base_addr = NULL_TREE;
4843 }
4844 invalid = false;
4845 }
4846 }
4847 }
4848
4849 if (invalid
4850 && bitsize.is_constant (&const_bitsize)
4851 && ((const_bitsize % BITS_PER_UNIT) != 0
4852 || !multiple_p (bitpos, BITS_PER_UNIT))
4853 && const_bitsize <= 64)
4854 {
4855 /* Bypass a conversion to the bit-field type. */
4856 if (!bit_not_p
4857 && is_gimple_assign (def_stmt)
4858 && CONVERT_EXPR_CODE_P (rhs_code))
4859 {
4860 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4861 if (TREE_CODE (rhs1) == SSA_NAME
4862 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4863 rhs = rhs1;
4864 }
4865 rhs_code = BIT_INSERT_EXPR;
4866 bit_not_p = false;
4867 ops[0].val = rhs;
4868 ops[0].base_addr = NULL_TREE;
4869 ops[1].base_addr = NULL_TREE;
4870 invalid = false;
4871 }
4872 }
4873
4874 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4875 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4876 if (invalid
4877 || !bitsize.is_constant (&const_bitsize)
4878 || !bitpos.is_constant (&const_bitpos)
4879 || !bitregion_start.is_constant (&const_bitregion_start)
4880 || !bitregion_end.is_constant (&const_bitregion_end))
4881 return terminate_all_aliasing_chains (NULL, stmt);
4882
4883 if (!ins_stmt)
4884 memset (&n, 0, sizeof (n));
4885
4886 class imm_store_chain_info **chain_info = NULL;
4887 bool ret = false;
4888 if (base_addr)
4889 chain_info = m_stores.get (base_addr);
4890
4891 store_immediate_info *info;
4892 if (chain_info)
4893 {
4894 unsigned int ord = (*chain_info)->m_store_info.length ();
4895 info = new store_immediate_info (const_bitsize, const_bitpos,
4896 const_bitregion_start,
4897 const_bitregion_end,
4898 stmt, ord, rhs_code, n, ins_stmt,
4899 bit_not_p, lp_nr_for_store (stmt),
4900 ops[0], ops[1]);
4901 if (dump_file && (dump_flags & TDF_DETAILS))
4902 {
4903 fprintf (dump_file, "Recording immediate store from stmt:\n");
4904 print_gimple_stmt (dump_file, stmt, 0);
4905 }
4906 (*chain_info)->m_store_info.safe_push (info);
4907 ret |= terminate_all_aliasing_chains (chain_info, stmt);
4908 /* If we reach the limit of stores to merge in a chain terminate and
4909 process the chain now. */
4910 if ((*chain_info)->m_store_info.length ()
4911 == (unsigned int) param_max_stores_to_merge)
4912 {
4913 if (dump_file && (dump_flags & TDF_DETAILS))
4914 fprintf (dump_file,
4915 "Reached maximum number of statements to merge:\n");
4916 ret |= terminate_and_process_chain (*chain_info);
4917 }
4918 return ret;
4919 }
4920
4921 /* Store aliases any existing chain? */
4922 ret |= terminate_all_aliasing_chains (NULL, stmt);
4923 /* Start a new chain. */
4924 class imm_store_chain_info *new_chain
4925 = new imm_store_chain_info (m_stores_head, base_addr);
4926 info = new store_immediate_info (const_bitsize, const_bitpos,
4927 const_bitregion_start,
4928 const_bitregion_end,
4929 stmt, 0, rhs_code, n, ins_stmt,
4930 bit_not_p, lp_nr_for_store (stmt),
4931 ops[0], ops[1]);
4932 new_chain->m_store_info.safe_push (info);
4933 m_stores.put (base_addr, new_chain);
4934 if (dump_file && (dump_flags & TDF_DETAILS))
4935 {
4936 fprintf (dump_file, "Starting new chain with statement:\n");
4937 print_gimple_stmt (dump_file, stmt, 0);
4938 fprintf (dump_file, "The base object is:\n");
4939 print_generic_expr (dump_file, base_addr);
4940 fprintf (dump_file, "\n");
4941 }
4942 return ret;
4943 }
4944
4945 /* Return true if STMT is a store valid for store merging. */
4946
4947 static bool
store_valid_for_store_merging_p(gimple * stmt)4948 store_valid_for_store_merging_p (gimple *stmt)
4949 {
4950 return gimple_assign_single_p (stmt)
4951 && gimple_vdef (stmt)
4952 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4953 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4954 }
4955
4956 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4957
4958 /* Return the status of basic block BB wrt store merging. */
4959
4960 static enum basic_block_status
get_status_for_store_merging(basic_block bb)4961 get_status_for_store_merging (basic_block bb)
4962 {
4963 unsigned int num_statements = 0;
4964 gimple_stmt_iterator gsi;
4965 edge e;
4966 gimple *last_stmt = NULL;
4967
4968 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4969 {
4970 gimple *stmt = gsi_stmt (gsi);
4971
4972 if (is_gimple_debug (stmt))
4973 continue;
4974
4975 last_stmt = stmt;
4976
4977 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4978 break;
4979 }
4980
4981 if (num_statements == 0)
4982 return BB_INVALID;
4983
4984 if (cfun->can_throw_non_call_exceptions && cfun->eh
4985 && store_valid_for_store_merging_p (last_stmt)
4986 && (e = find_fallthru_edge (bb->succs))
4987 && e->dest == bb->next_bb)
4988 return BB_EXTENDED_VALID;
4989
4990 return num_statements >= 2 ? BB_VALID : BB_INVALID;
4991 }
4992
4993 /* Entry point for the pass. Go over each basic block recording chains of
4994 immediate stores. Upon encountering a terminating statement (as defined
4995 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4996 variants. */
4997
4998 unsigned int
execute(function * fun)4999 pass_store_merging::execute (function *fun)
5000 {
5001 basic_block bb;
5002 hash_set<gimple *> orig_stmts;
5003 bool changed = false, open_chains = false;
5004
5005 /* If the function can throw and catch non-call exceptions, we'll be trying
5006 to merge stores across different basic blocks so we need to first unsplit
5007 the EH edges in order to streamline the CFG of the function. */
5008 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5009 unsplit_eh_edges ();
5010
5011 calculate_dominance_info (CDI_DOMINATORS);
5012
5013 FOR_EACH_BB_FN (bb, fun)
5014 {
5015 const basic_block_status bb_status = get_status_for_store_merging (bb);
5016 gimple_stmt_iterator gsi;
5017
5018 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5019 {
5020 changed |= terminate_and_process_all_chains ();
5021 open_chains = false;
5022 }
5023
5024 if (bb_status == BB_INVALID)
5025 continue;
5026
5027 if (dump_file && (dump_flags & TDF_DETAILS))
5028 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5029
5030 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5031 {
5032 gimple *stmt = gsi_stmt (gsi);
5033
5034 if (is_gimple_debug (stmt))
5035 continue;
5036
5037 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5038 {
5039 /* Terminate all chains. */
5040 if (dump_file && (dump_flags & TDF_DETAILS))
5041 fprintf (dump_file, "Volatile access terminates "
5042 "all chains\n");
5043 changed |= terminate_and_process_all_chains ();
5044 open_chains = false;
5045 continue;
5046 }
5047
5048 if (store_valid_for_store_merging_p (stmt))
5049 changed |= process_store (stmt);
5050 else
5051 changed |= terminate_all_aliasing_chains (NULL, stmt);
5052 }
5053
5054 if (bb_status == BB_EXTENDED_VALID)
5055 open_chains = true;
5056 else
5057 {
5058 changed |= terminate_and_process_all_chains ();
5059 open_chains = false;
5060 }
5061 }
5062
5063 if (open_chains)
5064 changed |= terminate_and_process_all_chains ();
5065
5066 /* If the function can throw and catch non-call exceptions and something
5067 changed during the pass, then the CFG has (very likely) changed too. */
5068 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5069 {
5070 free_dominance_info (CDI_DOMINATORS);
5071 return TODO_cleanup_cfg;
5072 }
5073
5074 return 0;
5075 }
5076
5077 } // anon namespace
5078
5079 /* Construct and return a store merging pass object. */
5080
5081 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5082 make_pass_store_merging (gcc::context *ctxt)
5083 {
5084 return new pass_store_merging (ctxt);
5085 }
5086
5087 #if CHECKING_P
5088
5089 namespace selftest {
5090
5091 /* Selftests for store merging helpers. */
5092
5093 /* Assert that all elements of the byte arrays X and Y, both of length N
5094 are equal. */
5095
5096 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5097 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5098 {
5099 for (unsigned int i = 0; i < n; i++)
5100 {
5101 if (x[i] != y[i])
5102 {
5103 fprintf (stderr, "Arrays do not match. X:\n");
5104 dump_char_array (stderr, x, n);
5105 fprintf (stderr, "Y:\n");
5106 dump_char_array (stderr, y, n);
5107 }
5108 ASSERT_EQ (x[i], y[i]);
5109 }
5110 }
5111
5112 /* Test shift_bytes_in_array_left and that it carries bits across between
5113 bytes correctly. */
5114
5115 static void
verify_shift_bytes_in_array_left(void)5116 verify_shift_bytes_in_array_left (void)
5117 {
5118 /* byte 1 | byte 0
5119 00011111 | 11100000. */
5120 unsigned char orig[2] = { 0xe0, 0x1f };
5121 unsigned char in[2];
5122 memcpy (in, orig, sizeof orig);
5123
5124 unsigned char expected[2] = { 0x80, 0x7f };
5125 shift_bytes_in_array_left (in, sizeof (in), 2);
5126 verify_array_eq (in, expected, sizeof (in));
5127
5128 memcpy (in, orig, sizeof orig);
5129 memcpy (expected, orig, sizeof orig);
5130 /* Check that shifting by zero doesn't change anything. */
5131 shift_bytes_in_array_left (in, sizeof (in), 0);
5132 verify_array_eq (in, expected, sizeof (in));
5133
5134 }
5135
5136 /* Test shift_bytes_in_array_right and that it carries bits across between
5137 bytes correctly. */
5138
5139 static void
verify_shift_bytes_in_array_right(void)5140 verify_shift_bytes_in_array_right (void)
5141 {
5142 /* byte 1 | byte 0
5143 00011111 | 11100000. */
5144 unsigned char orig[2] = { 0x1f, 0xe0};
5145 unsigned char in[2];
5146 memcpy (in, orig, sizeof orig);
5147 unsigned char expected[2] = { 0x07, 0xf8};
5148 shift_bytes_in_array_right (in, sizeof (in), 2);
5149 verify_array_eq (in, expected, sizeof (in));
5150
5151 memcpy (in, orig, sizeof orig);
5152 memcpy (expected, orig, sizeof orig);
5153 /* Check that shifting by zero doesn't change anything. */
5154 shift_bytes_in_array_right (in, sizeof (in), 0);
5155 verify_array_eq (in, expected, sizeof (in));
5156 }
5157
5158 /* Test clear_bit_region that it clears exactly the bits asked and
5159 nothing more. */
5160
5161 static void
verify_clear_bit_region(void)5162 verify_clear_bit_region (void)
5163 {
5164 /* Start with all bits set and test clearing various patterns in them. */
5165 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5166 unsigned char in[3];
5167 unsigned char expected[3];
5168 memcpy (in, orig, sizeof in);
5169
5170 /* Check zeroing out all the bits. */
5171 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5172 expected[0] = expected[1] = expected[2] = 0;
5173 verify_array_eq (in, expected, sizeof in);
5174
5175 memcpy (in, orig, sizeof in);
5176 /* Leave the first and last bits intact. */
5177 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5178 expected[0] = 0x1;
5179 expected[1] = 0;
5180 expected[2] = 0x80;
5181 verify_array_eq (in, expected, sizeof in);
5182 }
5183
5184 /* Test clear_bit_region_be that it clears exactly the bits asked and
5185 nothing more. */
5186
5187 static void
verify_clear_bit_region_be(void)5188 verify_clear_bit_region_be (void)
5189 {
5190 /* Start with all bits set and test clearing various patterns in them. */
5191 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5192 unsigned char in[3];
5193 unsigned char expected[3];
5194 memcpy (in, orig, sizeof in);
5195
5196 /* Check zeroing out all the bits. */
5197 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5198 expected[0] = expected[1] = expected[2] = 0;
5199 verify_array_eq (in, expected, sizeof in);
5200
5201 memcpy (in, orig, sizeof in);
5202 /* Leave the first and last bits intact. */
5203 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5204 expected[0] = 0x80;
5205 expected[1] = 0;
5206 expected[2] = 0x1;
5207 verify_array_eq (in, expected, sizeof in);
5208 }
5209
5210
5211 /* Run all of the selftests within this file. */
5212
5213 void
store_merging_c_tests(void)5214 store_merging_c_tests (void)
5215 {
5216 verify_shift_bytes_in_array_left ();
5217 verify_shift_bytes_in_array_right ();
5218 verify_clear_bit_region ();
5219 verify_clear_bit_region_be ();
5220 }
5221
5222 } // namespace selftest
5223 #endif /* CHECKING_P. */
5224