xref: /llvm-project/llvm/test/CodeGen/X86/atomic-unordered.ll (revision d6f9278ae9e587d2d23a9940a2364aaafba74735)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O0 %s
3; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake | FileCheck --check-prefixes=CHECK,CHECK-O3 %s
4
5define i8 @load_i8(ptr %ptr) {
6; CHECK-O0-LABEL: load_i8:
7; CHECK-O0:       # %bb.0:
8; CHECK-O0-NEXT:    movb (%rdi), %al
9; CHECK-O0-NEXT:    retq
10;
11; CHECK-O3-LABEL: load_i8:
12; CHECK-O3:       # %bb.0:
13; CHECK-O3-NEXT:    movzbl (%rdi), %eax
14; CHECK-O3-NEXT:    retq
15  %v = load atomic i8, ptr %ptr unordered, align 1
16  ret i8 %v
17}
18
19define void @store_i8(ptr %ptr, i8 %v) {
20; CHECK-O0-LABEL: store_i8:
21; CHECK-O0:       # %bb.0:
22; CHECK-O0-NEXT:    movb %sil, %al
23; CHECK-O0-NEXT:    movb %al, (%rdi)
24; CHECK-O0-NEXT:    retq
25;
26; CHECK-O3-LABEL: store_i8:
27; CHECK-O3:       # %bb.0:
28; CHECK-O3-NEXT:    movb %sil, (%rdi)
29; CHECK-O3-NEXT:    retq
30  store atomic i8 %v, ptr %ptr unordered, align 1
31  ret void
32}
33
34define i16 @load_i16(ptr %ptr) {
35; CHECK-O0-LABEL: load_i16:
36; CHECK-O0:       # %bb.0:
37; CHECK-O0-NEXT:    movw (%rdi), %ax
38; CHECK-O0-NEXT:    retq
39;
40; CHECK-O3-LABEL: load_i16:
41; CHECK-O3:       # %bb.0:
42; CHECK-O3-NEXT:    movzwl (%rdi), %eax
43; CHECK-O3-NEXT:    retq
44  %v = load atomic i16, ptr %ptr unordered, align 2
45  ret i16 %v
46}
47
48
49define void @store_i16(ptr %ptr, i16 %v) {
50; CHECK-O0-LABEL: store_i16:
51; CHECK-O0:       # %bb.0:
52; CHECK-O0-NEXT:    movw %si, %ax
53; CHECK-O0-NEXT:    movw %ax, (%rdi)
54; CHECK-O0-NEXT:    retq
55;
56; CHECK-O3-LABEL: store_i16:
57; CHECK-O3:       # %bb.0:
58; CHECK-O3-NEXT:    movw %si, (%rdi)
59; CHECK-O3-NEXT:    retq
60  store atomic i16 %v, ptr %ptr unordered, align 2
61  ret void
62}
63
64define i32 @load_i32(ptr %ptr) {
65; CHECK-LABEL: load_i32:
66; CHECK:       # %bb.0:
67; CHECK-NEXT:    movl (%rdi), %eax
68; CHECK-NEXT:    retq
69  %v = load atomic i32, ptr %ptr unordered, align 4
70  ret i32 %v
71}
72
73define void @store_i32(ptr %ptr, i32 %v) {
74; CHECK-LABEL: store_i32:
75; CHECK:       # %bb.0:
76; CHECK-NEXT:    movl %esi, (%rdi)
77; CHECK-NEXT:    retq
78  store atomic i32 %v, ptr %ptr unordered, align 4
79  ret void
80}
81
82define i64 @load_i64(ptr %ptr) {
83; CHECK-LABEL: load_i64:
84; CHECK:       # %bb.0:
85; CHECK-NEXT:    movq (%rdi), %rax
86; CHECK-NEXT:    retq
87  %v = load atomic i64, ptr %ptr unordered, align 8
88  ret i64 %v
89}
90
91define void @store_i64(ptr %ptr, i64 %v) {
92; CHECK-LABEL: store_i64:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    movq %rsi, (%rdi)
95; CHECK-NEXT:    retq
96  store atomic i64 %v, ptr %ptr unordered, align 8
97  ret void
98}
99
100;; The tests in the rest of this file are intended to show transforms which we
101;; either *can't* do for legality, or don't currently implement.  The later
102;; are noted carefully where relevant.
103
104;; Start w/some clearly illegal ones.
105
106; Must use a full width op, not a byte op
107define void @narrow_writeback_or(ptr %ptr) {
108; CHECK-O0-LABEL: narrow_writeback_or:
109; CHECK-O0:       # %bb.0:
110; CHECK-O0-NEXT:    movq (%rdi), %rax
111; CHECK-O0-NEXT:    orq $7, %rax
112; CHECK-O0-NEXT:    movq %rax, (%rdi)
113; CHECK-O0-NEXT:    retq
114;
115; CHECK-O3-LABEL: narrow_writeback_or:
116; CHECK-O3:       # %bb.0:
117; CHECK-O3-NEXT:    orq $7, (%rdi)
118; CHECK-O3-NEXT:    retq
119  %v = load atomic i64, ptr %ptr unordered, align 8
120  %v.new = or i64 %v, 7
121  store atomic i64 %v.new, ptr %ptr unordered, align 8
122  ret void
123}
124
125; Must use a full width op, not a byte op
126define void @narrow_writeback_and(ptr %ptr) {
127; CHECK-O0-LABEL: narrow_writeback_and:
128; CHECK-O0:       # %bb.0:
129; CHECK-O0-NEXT:    movq (%rdi), %rax
130; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
131; CHECK-O0-NEXT:    andl $-256, %eax
132; CHECK-O0-NEXT:    # kill: def $rax killed $eax
133; CHECK-O0-NEXT:    movq %rax, (%rdi)
134; CHECK-O0-NEXT:    retq
135;
136; CHECK-O3-LABEL: narrow_writeback_and:
137; CHECK-O3:       # %bb.0:
138; CHECK-O3-NEXT:    movl $4294967040, %eax # imm = 0xFFFFFF00
139; CHECK-O3-NEXT:    andq %rax, (%rdi)
140; CHECK-O3-NEXT:    retq
141  %v = load atomic i64, ptr %ptr unordered, align 8
142  %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00
143  store atomic i64 %v.new, ptr %ptr unordered, align 8
144  ret void
145}
146
147; Must use a full width op, not a byte op
148define void @narrow_writeback_xor(ptr %ptr) {
149; CHECK-O0-LABEL: narrow_writeback_xor:
150; CHECK-O0:       # %bb.0:
151; CHECK-O0-NEXT:    movq (%rdi), %rax
152; CHECK-O0-NEXT:    xorq $7, %rax
153; CHECK-O0-NEXT:    movq %rax, (%rdi)
154; CHECK-O0-NEXT:    retq
155;
156; CHECK-O3-LABEL: narrow_writeback_xor:
157; CHECK-O3:       # %bb.0:
158; CHECK-O3-NEXT:    xorq $7, (%rdi)
159; CHECK-O3-NEXT:    retq
160  %v = load atomic i64, ptr %ptr unordered, align 8
161  %v.new = xor i64 %v, 7
162  store atomic i64 %v.new, ptr %ptr unordered, align 8
163  ret void
164}
165
166;; Next batch of tests are exercising cases where store widening would
167;; improve codegeneration.  Note that widening is only legal if the
168;; resulting type would be atomic.  Each tests has a well aligned, and
169;; unaligned variant to ensure we get correct codegen here.
170;; Note: It's not a legality issue, but there's a gotcha here to be aware
171;; of.  Once we widen a pair of atomic stores, we loose the information
172;; that the original atomicity requirement was half the width.  Given that,
173;; we can't then split the load again.  This challenges our usual iterative
174;; approach to incremental improvement.
175
176; Legal if wider type is also atomic (TODO)
177define void @widen_store(ptr %p0, i32 %v1, i32 %v2) {
178; CHECK-LABEL: widen_store:
179; CHECK:       # %bb.0:
180; CHECK-NEXT:    movl %esi, (%rdi)
181; CHECK-NEXT:    movl %edx, 4(%rdi)
182; CHECK-NEXT:    retq
183  %p1 = getelementptr i32, ptr %p0, i64 1
184  store atomic i32 %v1, ptr %p0 unordered, align 8
185  store atomic i32 %v2, ptr %p1 unordered, align 4
186  ret void
187}
188
189; This one is *NOT* legal to widen.  With weaker alignment,
190; the wider type might cross a cache line and violate the
191; atomicity requirement.
192define void @widen_store_unaligned(ptr %p0, i32 %v1, i32 %v2) {
193; CHECK-LABEL: widen_store_unaligned:
194; CHECK:       # %bb.0:
195; CHECK-NEXT:    movl %esi, (%rdi)
196; CHECK-NEXT:    movl %edx, 4(%rdi)
197; CHECK-NEXT:    retq
198  %p1 = getelementptr i32, ptr %p0, i64 1
199  store atomic i32 %v1, ptr %p0 unordered, align 4
200  store atomic i32 %v2, ptr %p1 unordered, align 4
201  ret void
202}
203
204; Legal if wider type is also atomic (TODO)
205define void @widen_broadcast(ptr %p0, i32 %v) {
206; CHECK-LABEL: widen_broadcast:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    movl %esi, (%rdi)
209; CHECK-NEXT:    movl %esi, 4(%rdi)
210; CHECK-NEXT:    retq
211  %p1 = getelementptr i32, ptr %p0, i64 1
212  store atomic i32 %v, ptr %p0 unordered, align 8
213  store atomic i32 %v, ptr %p1 unordered, align 4
214  ret void
215}
216
217; Not legal to widen due to alignment restriction
218define void @widen_broadcast_unaligned(ptr %p0, i32 %v) {
219; CHECK-LABEL: widen_broadcast_unaligned:
220; CHECK:       # %bb.0:
221; CHECK-NEXT:    movl %esi, (%rdi)
222; CHECK-NEXT:    movl %esi, 4(%rdi)
223; CHECK-NEXT:    retq
224  %p1 = getelementptr i32, ptr %p0, i64 1
225  store atomic i32 %v, ptr %p0 unordered, align 4
226  store atomic i32 %v, ptr %p1 unordered, align 4
227  ret void
228}
229
230define i128 @load_i128(ptr %ptr) {
231; CHECK-LABEL: load_i128:
232; CHECK:       # %bb.0:
233; CHECK-NEXT:    vmovdqa (%rdi), %xmm0
234; CHECK-NEXT:    vmovq %xmm0, %rax
235; CHECK-NEXT:    vpextrq $1, %xmm0, %rdx
236; CHECK-NEXT:    retq
237  %v = load atomic i128, ptr %ptr unordered, align 16
238  ret i128 %v
239}
240
241define void @store_i128(ptr %ptr, i128 %v) {
242; CHECK-O0-LABEL: store_i128:
243; CHECK-O0:       # %bb.0:
244; CHECK-O0-NEXT:    vmovq %rsi, %xmm0
245; CHECK-O0-NEXT:    vmovq %rdx, %xmm1
246; CHECK-O0-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
247; CHECK-O0-NEXT:    vmovdqa %xmm0, (%rdi)
248; CHECK-O0-NEXT:    retq
249;
250; CHECK-O3-LABEL: store_i128:
251; CHECK-O3:       # %bb.0:
252; CHECK-O3-NEXT:    vmovq %rdx, %xmm0
253; CHECK-O3-NEXT:    vmovq %rsi, %xmm1
254; CHECK-O3-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
255; CHECK-O3-NEXT:    vmovdqa %xmm0, (%rdi)
256; CHECK-O3-NEXT:    retq
257  store atomic i128 %v, ptr %ptr unordered, align 16
258  ret void
259}
260
261define i256 @load_i256(ptr %ptr) {
262; CHECK-O0-LABEL: load_i256:
263; CHECK-O0:       # %bb.0:
264; CHECK-O0-NEXT:    subq $56, %rsp
265; CHECK-O0-NEXT:    .cfi_def_cfa_offset 64
266; CHECK-O0-NEXT:    movq %rdi, %rax
267; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
268; CHECK-O0-NEXT:    movq %rdi, (%rsp) # 8-byte Spill
269; CHECK-O0-NEXT:    movl $32, %edi
270; CHECK-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
271; CHECK-O0-NEXT:    xorl %ecx, %ecx
272; CHECK-O0-NEXT:    callq __atomic_load@PLT
273; CHECK-O0-NEXT:    movq (%rsp), %rdi # 8-byte Reload
274; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
275; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
276; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
277; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
278; CHECK-O0-NEXT:    movq {{[0-9]+}}(%rsp), %r8
279; CHECK-O0-NEXT:    movq %r8, 24(%rdi)
280; CHECK-O0-NEXT:    movq %rsi, 16(%rdi)
281; CHECK-O0-NEXT:    movq %rdx, 8(%rdi)
282; CHECK-O0-NEXT:    movq %rcx, (%rdi)
283; CHECK-O0-NEXT:    addq $56, %rsp
284; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
285; CHECK-O0-NEXT:    retq
286;
287; CHECK-O3-LABEL: load_i256:
288; CHECK-O3:       # %bb.0:
289; CHECK-O3-NEXT:    pushq %rbx
290; CHECK-O3-NEXT:    .cfi_def_cfa_offset 16
291; CHECK-O3-NEXT:    subq $32, %rsp
292; CHECK-O3-NEXT:    .cfi_def_cfa_offset 48
293; CHECK-O3-NEXT:    .cfi_offset %rbx, -16
294; CHECK-O3-NEXT:    movq %rdi, %rbx
295; CHECK-O3-NEXT:    movq %rsp, %rdx
296; CHECK-O3-NEXT:    movl $32, %edi
297; CHECK-O3-NEXT:    xorl %ecx, %ecx
298; CHECK-O3-NEXT:    callq __atomic_load@PLT
299; CHECK-O3-NEXT:    vmovups (%rsp), %ymm0
300; CHECK-O3-NEXT:    vmovups %ymm0, (%rbx)
301; CHECK-O3-NEXT:    movq %rbx, %rax
302; CHECK-O3-NEXT:    addq $32, %rsp
303; CHECK-O3-NEXT:    .cfi_def_cfa_offset 16
304; CHECK-O3-NEXT:    popq %rbx
305; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
306; CHECK-O3-NEXT:    vzeroupper
307; CHECK-O3-NEXT:    retq
308  %v = load atomic i256, ptr %ptr unordered, align 16
309  ret i256 %v
310}
311
312define void @store_i256(ptr %ptr, i256 %v) {
313; CHECK-O0-LABEL: store_i256:
314; CHECK-O0:       # %bb.0:
315; CHECK-O0-NEXT:    subq $40, %rsp
316; CHECK-O0-NEXT:    .cfi_def_cfa_offset 48
317; CHECK-O0-NEXT:    movq %rsi, %rax
318; CHECK-O0-NEXT:    movq %rdi, %rsi
319; CHECK-O0-NEXT:    movq %rax, (%rsp)
320; CHECK-O0-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
321; CHECK-O0-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
322; CHECK-O0-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
323; CHECK-O0-NEXT:    movl $32, %edi
324; CHECK-O0-NEXT:    movq %rsp, %rdx
325; CHECK-O0-NEXT:    xorl %ecx, %ecx
326; CHECK-O0-NEXT:    callq __atomic_store@PLT
327; CHECK-O0-NEXT:    addq $40, %rsp
328; CHECK-O0-NEXT:    .cfi_def_cfa_offset 8
329; CHECK-O0-NEXT:    retq
330;
331; CHECK-O3-LABEL: store_i256:
332; CHECK-O3:       # %bb.0:
333; CHECK-O3-NEXT:    subq $40, %rsp
334; CHECK-O3-NEXT:    .cfi_def_cfa_offset 48
335; CHECK-O3-NEXT:    movq %rdi, %rax
336; CHECK-O3-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
337; CHECK-O3-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
338; CHECK-O3-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
339; CHECK-O3-NEXT:    movq %rsi, (%rsp)
340; CHECK-O3-NEXT:    movq %rsp, %rdx
341; CHECK-O3-NEXT:    movl $32, %edi
342; CHECK-O3-NEXT:    movq %rax, %rsi
343; CHECK-O3-NEXT:    xorl %ecx, %ecx
344; CHECK-O3-NEXT:    callq __atomic_store@PLT
345; CHECK-O3-NEXT:    addq $40, %rsp
346; CHECK-O3-NEXT:    .cfi_def_cfa_offset 8
347; CHECK-O3-NEXT:    retq
348  store atomic i256 %v, ptr %ptr unordered, align 16
349  ret void
350}
351
352; Legal if wider type is also atomic (TODO)
353define void @vec_store(ptr %p0, <2 x i32> %vec) {
354; CHECK-O0-LABEL: vec_store:
355; CHECK-O0:       # %bb.0:
356; CHECK-O0-NEXT:    vmovd %xmm0, %ecx
357; CHECK-O0-NEXT:    vpextrd $1, %xmm0, %eax
358; CHECK-O0-NEXT:    movl %ecx, (%rdi)
359; CHECK-O0-NEXT:    movl %eax, 4(%rdi)
360; CHECK-O0-NEXT:    retq
361;
362; CHECK-O3-LABEL: vec_store:
363; CHECK-O3:       # %bb.0:
364; CHECK-O3-NEXT:    vmovd %xmm0, %eax
365; CHECK-O3-NEXT:    vpextrd $1, %xmm0, %ecx
366; CHECK-O3-NEXT:    movl %eax, (%rdi)
367; CHECK-O3-NEXT:    movl %ecx, 4(%rdi)
368; CHECK-O3-NEXT:    retq
369  %v1 = extractelement <2 x i32> %vec, i32 0
370  %v2 = extractelement <2 x i32> %vec, i32 1
371  %p1 = getelementptr i32, ptr %p0, i64 1
372  store atomic i32 %v1, ptr %p0 unordered, align 8
373  store atomic i32 %v2, ptr %p1 unordered, align 4
374  ret void
375}
376
377; Not legal to widen due to alignment restriction
378define void @vec_store_unaligned(ptr %p0, <2 x i32> %vec) {
379; CHECK-O0-LABEL: vec_store_unaligned:
380; CHECK-O0:       # %bb.0:
381; CHECK-O0-NEXT:    vmovd %xmm0, %ecx
382; CHECK-O0-NEXT:    vpextrd $1, %xmm0, %eax
383; CHECK-O0-NEXT:    movl %ecx, (%rdi)
384; CHECK-O0-NEXT:    movl %eax, 4(%rdi)
385; CHECK-O0-NEXT:    retq
386;
387; CHECK-O3-LABEL: vec_store_unaligned:
388; CHECK-O3:       # %bb.0:
389; CHECK-O3-NEXT:    vmovd %xmm0, %eax
390; CHECK-O3-NEXT:    vpextrd $1, %xmm0, %ecx
391; CHECK-O3-NEXT:    movl %eax, (%rdi)
392; CHECK-O3-NEXT:    movl %ecx, 4(%rdi)
393; CHECK-O3-NEXT:    retq
394  %v1 = extractelement <2 x i32> %vec, i32 0
395  %v2 = extractelement <2 x i32> %vec, i32 1
396  %p1 = getelementptr i32, ptr %p0, i64 1
397  store atomic i32 %v1, ptr %p0 unordered, align 4
398  store atomic i32 %v2, ptr %p1 unordered, align 4
399  ret void
400}
401
402
403
404; Legal if wider type is also atomic (TODO)
405; Also, can avoid register move from xmm to eax (TODO)
406define void @widen_broadcast2(ptr %p0, <2 x i32> %vec) {
407; CHECK-LABEL: widen_broadcast2:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vmovd %xmm0, %eax
410; CHECK-NEXT:    movl %eax, (%rdi)
411; CHECK-NEXT:    movl %eax, 4(%rdi)
412; CHECK-NEXT:    retq
413  %v1 = extractelement <2 x i32> %vec, i32 0
414  %p1 = getelementptr i32, ptr %p0, i64 1
415  store atomic i32 %v1, ptr %p0 unordered, align 8
416  store atomic i32 %v1, ptr %p1 unordered, align 4
417  ret void
418}
419
420; Not legal to widen due to alignment restriction
421define void @widen_broadcast2_unaligned(ptr %p0, <2 x i32> %vec) {
422; CHECK-LABEL: widen_broadcast2_unaligned:
423; CHECK:       # %bb.0:
424; CHECK-NEXT:    vmovd %xmm0, %eax
425; CHECK-NEXT:    movl %eax, (%rdi)
426; CHECK-NEXT:    movl %eax, 4(%rdi)
427; CHECK-NEXT:    retq
428  %v1 = extractelement <2 x i32> %vec, i32 0
429  %p1 = getelementptr i32, ptr %p0, i64 1
430  store atomic i32 %v1, ptr %p0 unordered, align 4
431  store atomic i32 %v1, ptr %p1 unordered, align 4
432  ret void
433}
434
435; Legal if wider type is also atomic (TODO)
436define void @widen_zero_init(ptr %p0, i32 %v1, i32 %v2) {
437; CHECK-LABEL: widen_zero_init:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    movl $0, (%rdi)
440; CHECK-NEXT:    movl $0, 4(%rdi)
441; CHECK-NEXT:    retq
442  %p1 = getelementptr i32, ptr %p0, i64 1
443  store atomic i32 0, ptr %p0 unordered, align 8
444  store atomic i32 0, ptr %p1 unordered, align 4
445  ret void
446}
447
448; Not legal to widen due to alignment restriction
449define void @widen_zero_init_unaligned(ptr %p0, i32 %v1, i32 %v2) {
450; CHECK-LABEL: widen_zero_init_unaligned:
451; CHECK:       # %bb.0:
452; CHECK-NEXT:    movl $0, (%rdi)
453; CHECK-NEXT:    movl $0, 4(%rdi)
454; CHECK-NEXT:    retq
455  %p1 = getelementptr i32, ptr %p0, i64 1
456  store atomic i32 0, ptr %p0 unordered, align 4
457  store atomic i32 0, ptr %p1 unordered, align 4
458  ret void
459}
460
461;; The next batch of tests are stressing load folding. Folding is legal
462;; on x86, so these are simply checking optimization quality.
463
464; Legal, as expected
465define i64 @load_fold_add1(ptr %p) {
466; CHECK-LABEL: load_fold_add1:
467; CHECK:       # %bb.0:
468; CHECK-NEXT:    movq (%rdi), %rax
469; CHECK-NEXT:    addq $15, %rax
470; CHECK-NEXT:    retq
471  %v = load atomic i64, ptr %p unordered, align 8
472  %ret = add i64 %v, 15
473  ret i64 %ret
474}
475
476define i64 @load_fold_add2(ptr %p, i64 %v2) {
477; CHECK-LABEL: load_fold_add2:
478; CHECK:       # %bb.0:
479; CHECK-NEXT:    movq %rsi, %rax
480; CHECK-NEXT:    addq (%rdi), %rax
481; CHECK-NEXT:    retq
482  %v = load atomic i64, ptr %p unordered, align 8
483  %ret = add i64 %v, %v2
484  ret i64 %ret
485}
486
487define i64 @load_fold_add3(ptr %p1, ptr %p2) {
488; CHECK-O0-LABEL: load_fold_add3:
489; CHECK-O0:       # %bb.0:
490; CHECK-O0-NEXT:    movq (%rdi), %rax
491; CHECK-O0-NEXT:    addq (%rsi), %rax
492; CHECK-O0-NEXT:    retq
493;
494; CHECK-O3-LABEL: load_fold_add3:
495; CHECK-O3:       # %bb.0:
496; CHECK-O3-NEXT:    movq (%rsi), %rax
497; CHECK-O3-NEXT:    addq (%rdi), %rax
498; CHECK-O3-NEXT:    retq
499  %v = load atomic i64, ptr %p1 unordered, align 8
500  %v2 = load atomic i64, ptr %p2 unordered, align 8
501  %ret = add i64 %v, %v2
502  ret i64 %ret
503}
504
505; Legal, as expected
506define i64 @load_fold_sub1(ptr %p) {
507; CHECK-O0-LABEL: load_fold_sub1:
508; CHECK-O0:       # %bb.0:
509; CHECK-O0-NEXT:    movq (%rdi), %rax
510; CHECK-O0-NEXT:    subq $15, %rax
511; CHECK-O0-NEXT:    retq
512;
513; CHECK-O3-LABEL: load_fold_sub1:
514; CHECK-O3:       # %bb.0:
515; CHECK-O3-NEXT:    movq (%rdi), %rax
516; CHECK-O3-NEXT:    addq $-15, %rax
517; CHECK-O3-NEXT:    retq
518  %v = load atomic i64, ptr %p unordered, align 8
519  %ret = sub i64 %v, 15
520  ret i64 %ret
521}
522
523define i64 @load_fold_sub2(ptr %p, i64 %v2) {
524; CHECK-LABEL: load_fold_sub2:
525; CHECK:       # %bb.0:
526; CHECK-NEXT:    movq (%rdi), %rax
527; CHECK-NEXT:    subq %rsi, %rax
528; CHECK-NEXT:    retq
529  %v = load atomic i64, ptr %p unordered, align 8
530  %ret = sub i64 %v, %v2
531  ret i64 %ret
532}
533
534define i64 @load_fold_sub3(ptr %p1, ptr %p2) {
535; CHECK-LABEL: load_fold_sub3:
536; CHECK:       # %bb.0:
537; CHECK-NEXT:    movq (%rdi), %rax
538; CHECK-NEXT:    subq (%rsi), %rax
539; CHECK-NEXT:    retq
540  %v = load atomic i64, ptr %p1 unordered, align 8
541  %v2 = load atomic i64, ptr %p2 unordered, align 8
542  %ret = sub i64 %v, %v2
543  ret i64 %ret
544}
545
546; Legal, as expected
547define i64 @load_fold_mul1(ptr %p) {
548; CHECK-O0-LABEL: load_fold_mul1:
549; CHECK-O0:       # %bb.0:
550; CHECK-O0-NEXT:    imulq $15, (%rdi), %rax
551; CHECK-O0-NEXT:    retq
552;
553; CHECK-O3-LABEL: load_fold_mul1:
554; CHECK-O3:       # %bb.0:
555; CHECK-O3-NEXT:    movq (%rdi), %rax
556; CHECK-O3-NEXT:    leaq (%rax,%rax,4), %rax
557; CHECK-O3-NEXT:    leaq (%rax,%rax,2), %rax
558; CHECK-O3-NEXT:    retq
559  %v = load atomic i64, ptr %p unordered, align 8
560  %ret = mul i64 %v, 15
561  ret i64 %ret
562}
563
564define i64 @load_fold_mul2(ptr %p, i64 %v2) {
565; CHECK-LABEL: load_fold_mul2:
566; CHECK:       # %bb.0:
567; CHECK-NEXT:    movq %rsi, %rax
568; CHECK-NEXT:    imulq (%rdi), %rax
569; CHECK-NEXT:    retq
570  %v = load atomic i64, ptr %p unordered, align 8
571  %ret = mul i64 %v, %v2
572  ret i64 %ret
573}
574
575define i64 @load_fold_mul3(ptr %p1, ptr %p2) {
576; CHECK-O0-LABEL: load_fold_mul3:
577; CHECK-O0:       # %bb.0:
578; CHECK-O0-NEXT:    movq (%rdi), %rax
579; CHECK-O0-NEXT:    imulq (%rsi), %rax
580; CHECK-O0-NEXT:    retq
581;
582; CHECK-O3-LABEL: load_fold_mul3:
583; CHECK-O3:       # %bb.0:
584; CHECK-O3-NEXT:    movq (%rsi), %rax
585; CHECK-O3-NEXT:    imulq (%rdi), %rax
586; CHECK-O3-NEXT:    retq
587  %v = load atomic i64, ptr %p1 unordered, align 8
588  %v2 = load atomic i64, ptr %p2 unordered, align 8
589  %ret = mul i64 %v, %v2
590  ret i64 %ret
591}
592
593; Legal to fold (TODO)
594define i64 @load_fold_sdiv1(ptr %p) {
595; CHECK-O0-LABEL: load_fold_sdiv1:
596; CHECK-O0:       # %bb.0:
597; CHECK-O0-NEXT:    movq (%rdi), %rax
598; CHECK-O0-NEXT:    movl $15, %ecx
599; CHECK-O0-NEXT:    cqto
600; CHECK-O0-NEXT:    idivq %rcx
601; CHECK-O0-NEXT:    retq
602;
603; CHECK-O3-LABEL: load_fold_sdiv1:
604; CHECK-O3:       # %bb.0:
605; CHECK-O3-NEXT:    movq (%rdi), %rcx
606; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
607; CHECK-O3-NEXT:    movq %rcx, %rax
608; CHECK-O3-NEXT:    imulq %rdx
609; CHECK-O3-NEXT:    addq %rdx, %rcx
610; CHECK-O3-NEXT:    movq %rcx, %rax
611; CHECK-O3-NEXT:    shrq $63, %rax
612; CHECK-O3-NEXT:    sarq $3, %rcx
613; CHECK-O3-NEXT:    addq %rax, %rcx
614; CHECK-O3-NEXT:    movq %rcx, %rax
615; CHECK-O3-NEXT:    retq
616  %v = load atomic i64, ptr %p unordered, align 8
617  %ret = sdiv i64 %v, 15
618  ret i64 %ret
619}
620
621; Legal to fold (TODO)
622define i64 @load_fold_sdiv2(ptr %p, i64 %v2) {
623; CHECK-O0-LABEL: load_fold_sdiv2:
624; CHECK-O0:       # %bb.0:
625; CHECK-O0-NEXT:    movq (%rdi), %rax
626; CHECK-O0-NEXT:    cqto
627; CHECK-O0-NEXT:    idivq %rsi
628; CHECK-O0-NEXT:    retq
629;
630; CHECK-O3-LABEL: load_fold_sdiv2:
631; CHECK-O3:       # %bb.0:
632; CHECK-O3-NEXT:    movq (%rdi), %rax
633; CHECK-O3-NEXT:    movq %rax, %rcx
634; CHECK-O3-NEXT:    orq %rsi, %rcx
635; CHECK-O3-NEXT:    shrq $32, %rcx
636; CHECK-O3-NEXT:    je .LBB35_1
637; CHECK-O3-NEXT:  # %bb.2:
638; CHECK-O3-NEXT:    cqto
639; CHECK-O3-NEXT:    idivq %rsi
640; CHECK-O3-NEXT:    retq
641; CHECK-O3-NEXT:  .LBB35_1:
642; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
643; CHECK-O3-NEXT:    xorl %edx, %edx
644; CHECK-O3-NEXT:    divl %esi
645; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
646; CHECK-O3-NEXT:    retq
647  %v = load atomic i64, ptr %p unordered, align 8
648  %ret = sdiv i64 %v, %v2
649  ret i64 %ret
650}
651
652define i64 @load_fold_sdiv3(ptr %p1, ptr %p2) {
653; CHECK-O0-LABEL: load_fold_sdiv3:
654; CHECK-O0:       # %bb.0:
655; CHECK-O0-NEXT:    movq (%rdi), %rax
656; CHECK-O0-NEXT:    cqto
657; CHECK-O0-NEXT:    idivq (%rsi)
658; CHECK-O0-NEXT:    retq
659;
660; CHECK-O3-LABEL: load_fold_sdiv3:
661; CHECK-O3:       # %bb.0:
662; CHECK-O3-NEXT:    movq (%rdi), %rax
663; CHECK-O3-NEXT:    movq (%rsi), %rcx
664; CHECK-O3-NEXT:    movq %rax, %rdx
665; CHECK-O3-NEXT:    orq %rcx, %rdx
666; CHECK-O3-NEXT:    shrq $32, %rdx
667; CHECK-O3-NEXT:    je .LBB36_1
668; CHECK-O3-NEXT:  # %bb.2:
669; CHECK-O3-NEXT:    cqto
670; CHECK-O3-NEXT:    idivq %rcx
671; CHECK-O3-NEXT:    retq
672; CHECK-O3-NEXT:  .LBB36_1:
673; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
674; CHECK-O3-NEXT:    xorl %edx, %edx
675; CHECK-O3-NEXT:    divl %ecx
676; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
677; CHECK-O3-NEXT:    retq
678  %v = load atomic i64, ptr %p1 unordered, align 8
679  %v2 = load atomic i64, ptr %p2 unordered, align 8
680  %ret = sdiv i64 %v, %v2
681  ret i64 %ret
682}
683
684; Legal to fold (TODO)
685define i64 @load_fold_udiv1(ptr %p) {
686; CHECK-O0-LABEL: load_fold_udiv1:
687; CHECK-O0:       # %bb.0:
688; CHECK-O0-NEXT:    movq (%rdi), %rax
689; CHECK-O0-NEXT:    movl $15, %ecx
690; CHECK-O0-NEXT:    xorl %edx, %edx
691; CHECK-O0-NEXT:    # kill: def $rdx killed $edx
692; CHECK-O0-NEXT:    divq %rcx
693; CHECK-O0-NEXT:    retq
694;
695; CHECK-O3-LABEL: load_fold_udiv1:
696; CHECK-O3:       # %bb.0:
697; CHECK-O3-NEXT:    movq (%rdi), %rdx
698; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
699; CHECK-O3-NEXT:    mulxq %rax, %rax, %rax
700; CHECK-O3-NEXT:    shrq $3, %rax
701; CHECK-O3-NEXT:    retq
702  %v = load atomic i64, ptr %p unordered, align 8
703  %ret = udiv i64 %v, 15
704  ret i64 %ret
705}
706
707define i64 @load_fold_udiv2(ptr %p, i64 %v2) {
708; CHECK-O0-LABEL: load_fold_udiv2:
709; CHECK-O0:       # %bb.0:
710; CHECK-O0-NEXT:    movq (%rdi), %rax
711; CHECK-O0-NEXT:    xorl %ecx, %ecx
712; CHECK-O0-NEXT:    movl %ecx, %edx
713; CHECK-O0-NEXT:    divq %rsi
714; CHECK-O0-NEXT:    retq
715;
716; CHECK-O3-LABEL: load_fold_udiv2:
717; CHECK-O3:       # %bb.0:
718; CHECK-O3-NEXT:    movq (%rdi), %rax
719; CHECK-O3-NEXT:    movq %rax, %rcx
720; CHECK-O3-NEXT:    orq %rsi, %rcx
721; CHECK-O3-NEXT:    shrq $32, %rcx
722; CHECK-O3-NEXT:    je .LBB38_1
723; CHECK-O3-NEXT:  # %bb.2:
724; CHECK-O3-NEXT:    xorl %edx, %edx
725; CHECK-O3-NEXT:    divq %rsi
726; CHECK-O3-NEXT:    retq
727; CHECK-O3-NEXT:  .LBB38_1:
728; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
729; CHECK-O3-NEXT:    xorl %edx, %edx
730; CHECK-O3-NEXT:    divl %esi
731; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
732; CHECK-O3-NEXT:    retq
733  %v = load atomic i64, ptr %p unordered, align 8
734  %ret = udiv i64 %v, %v2
735  ret i64 %ret
736}
737
738define i64 @load_fold_udiv3(ptr %p1, ptr %p2) {
739; CHECK-O0-LABEL: load_fold_udiv3:
740; CHECK-O0:       # %bb.0:
741; CHECK-O0-NEXT:    movq (%rdi), %rax
742; CHECK-O0-NEXT:    xorl %ecx, %ecx
743; CHECK-O0-NEXT:    movl %ecx, %edx
744; CHECK-O0-NEXT:    divq (%rsi)
745; CHECK-O0-NEXT:    retq
746;
747; CHECK-O3-LABEL: load_fold_udiv3:
748; CHECK-O3:       # %bb.0:
749; CHECK-O3-NEXT:    movq (%rdi), %rax
750; CHECK-O3-NEXT:    movq (%rsi), %rcx
751; CHECK-O3-NEXT:    movq %rax, %rdx
752; CHECK-O3-NEXT:    orq %rcx, %rdx
753; CHECK-O3-NEXT:    shrq $32, %rdx
754; CHECK-O3-NEXT:    je .LBB39_1
755; CHECK-O3-NEXT:  # %bb.2:
756; CHECK-O3-NEXT:    xorl %edx, %edx
757; CHECK-O3-NEXT:    divq %rcx
758; CHECK-O3-NEXT:    retq
759; CHECK-O3-NEXT:  .LBB39_1:
760; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
761; CHECK-O3-NEXT:    xorl %edx, %edx
762; CHECK-O3-NEXT:    divl %ecx
763; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
764; CHECK-O3-NEXT:    retq
765  %v = load atomic i64, ptr %p1 unordered, align 8
766  %v2 = load atomic i64, ptr %p2 unordered, align 8
767  %ret = udiv i64 %v, %v2
768  ret i64 %ret
769}
770
771; Legal to fold (TODO)
772define i64 @load_fold_srem1(ptr %p) {
773; CHECK-O0-LABEL: load_fold_srem1:
774; CHECK-O0:       # %bb.0:
775; CHECK-O0-NEXT:    movq (%rdi), %rax
776; CHECK-O0-NEXT:    movl $15, %ecx
777; CHECK-O0-NEXT:    cqto
778; CHECK-O0-NEXT:    idivq %rcx
779; CHECK-O0-NEXT:    movq %rdx, %rax
780; CHECK-O0-NEXT:    retq
781;
782; CHECK-O3-LABEL: load_fold_srem1:
783; CHECK-O3:       # %bb.0:
784; CHECK-O3-NEXT:    movq (%rdi), %rcx
785; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
786; CHECK-O3-NEXT:    movq %rcx, %rax
787; CHECK-O3-NEXT:    imulq %rdx
788; CHECK-O3-NEXT:    addq %rcx, %rdx
789; CHECK-O3-NEXT:    movq %rdx, %rax
790; CHECK-O3-NEXT:    shrq $63, %rax
791; CHECK-O3-NEXT:    sarq $3, %rdx
792; CHECK-O3-NEXT:    addq %rax, %rdx
793; CHECK-O3-NEXT:    leaq (%rdx,%rdx,4), %rax
794; CHECK-O3-NEXT:    leaq (%rax,%rax,2), %rax
795; CHECK-O3-NEXT:    subq %rax, %rcx
796; CHECK-O3-NEXT:    movq %rcx, %rax
797; CHECK-O3-NEXT:    retq
798  %v = load atomic i64, ptr %p unordered, align 8
799  %ret = srem i64 %v, 15
800  ret i64 %ret
801}
802
803; Legal, as expected
804define i64 @load_fold_srem2(ptr %p, i64 %v2) {
805; CHECK-O0-LABEL: load_fold_srem2:
806; CHECK-O0:       # %bb.0:
807; CHECK-O0-NEXT:    movq (%rdi), %rax
808; CHECK-O0-NEXT:    cqto
809; CHECK-O0-NEXT:    idivq %rsi
810; CHECK-O0-NEXT:    movq %rdx, %rax
811; CHECK-O0-NEXT:    retq
812;
813; CHECK-O3-LABEL: load_fold_srem2:
814; CHECK-O3:       # %bb.0:
815; CHECK-O3-NEXT:    movq (%rdi), %rax
816; CHECK-O3-NEXT:    movq %rax, %rcx
817; CHECK-O3-NEXT:    orq %rsi, %rcx
818; CHECK-O3-NEXT:    shrq $32, %rcx
819; CHECK-O3-NEXT:    je .LBB41_1
820; CHECK-O3-NEXT:  # %bb.2:
821; CHECK-O3-NEXT:    cqto
822; CHECK-O3-NEXT:    idivq %rsi
823; CHECK-O3-NEXT:    movq %rdx, %rax
824; CHECK-O3-NEXT:    retq
825; CHECK-O3-NEXT:  .LBB41_1:
826; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
827; CHECK-O3-NEXT:    xorl %edx, %edx
828; CHECK-O3-NEXT:    divl %esi
829; CHECK-O3-NEXT:    movl %edx, %eax
830; CHECK-O3-NEXT:    retq
831  %v = load atomic i64, ptr %p unordered, align 8
832  %ret = srem i64 %v, %v2
833  ret i64 %ret
834}
835
836define i64 @load_fold_srem3(ptr %p1, ptr %p2) {
837; CHECK-O0-LABEL: load_fold_srem3:
838; CHECK-O0:       # %bb.0:
839; CHECK-O0-NEXT:    movq (%rdi), %rax
840; CHECK-O0-NEXT:    cqto
841; CHECK-O0-NEXT:    idivq (%rsi)
842; CHECK-O0-NEXT:    movq %rdx, %rax
843; CHECK-O0-NEXT:    retq
844;
845; CHECK-O3-LABEL: load_fold_srem3:
846; CHECK-O3:       # %bb.0:
847; CHECK-O3-NEXT:    movq (%rdi), %rax
848; CHECK-O3-NEXT:    movq (%rsi), %rcx
849; CHECK-O3-NEXT:    movq %rax, %rdx
850; CHECK-O3-NEXT:    orq %rcx, %rdx
851; CHECK-O3-NEXT:    shrq $32, %rdx
852; CHECK-O3-NEXT:    je .LBB42_1
853; CHECK-O3-NEXT:  # %bb.2:
854; CHECK-O3-NEXT:    cqto
855; CHECK-O3-NEXT:    idivq %rcx
856; CHECK-O3-NEXT:    movq %rdx, %rax
857; CHECK-O3-NEXT:    retq
858; CHECK-O3-NEXT:  .LBB42_1:
859; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
860; CHECK-O3-NEXT:    xorl %edx, %edx
861; CHECK-O3-NEXT:    divl %ecx
862; CHECK-O3-NEXT:    movl %edx, %eax
863; CHECK-O3-NEXT:    retq
864  %v = load atomic i64, ptr %p1 unordered, align 8
865  %v2 = load atomic i64, ptr %p2 unordered, align 8
866  %ret = srem i64 %v, %v2
867  ret i64 %ret
868}
869
870; Legal to fold (TODO)
871define i64 @load_fold_urem1(ptr %p) {
872; CHECK-O0-LABEL: load_fold_urem1:
873; CHECK-O0:       # %bb.0:
874; CHECK-O0-NEXT:    movq (%rdi), %rax
875; CHECK-O0-NEXT:    movl $15, %ecx
876; CHECK-O0-NEXT:    xorl %edx, %edx
877; CHECK-O0-NEXT:    # kill: def $rdx killed $edx
878; CHECK-O0-NEXT:    divq %rcx
879; CHECK-O0-NEXT:    movq %rdx, %rax
880; CHECK-O0-NEXT:    retq
881;
882; CHECK-O3-LABEL: load_fold_urem1:
883; CHECK-O3:       # %bb.0:
884; CHECK-O3-NEXT:    movq (%rdi), %rax
885; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
886; CHECK-O3-NEXT:    movq %rax, %rdx
887; CHECK-O3-NEXT:    mulxq %rcx, %rcx, %rcx
888; CHECK-O3-NEXT:    shrq $3, %rcx
889; CHECK-O3-NEXT:    leaq (%rcx,%rcx,4), %rcx
890; CHECK-O3-NEXT:    leaq (%rcx,%rcx,2), %rcx
891; CHECK-O3-NEXT:    subq %rcx, %rax
892; CHECK-O3-NEXT:    retq
893  %v = load atomic i64, ptr %p unordered, align 8
894  %ret = urem i64 %v, 15
895  ret i64 %ret
896}
897
898; Legal, as expected
899define i64 @load_fold_urem2(ptr %p, i64 %v2) {
900; CHECK-O0-LABEL: load_fold_urem2:
901; CHECK-O0:       # %bb.0:
902; CHECK-O0-NEXT:    movq (%rdi), %rax
903; CHECK-O0-NEXT:    xorl %ecx, %ecx
904; CHECK-O0-NEXT:    movl %ecx, %edx
905; CHECK-O0-NEXT:    divq %rsi
906; CHECK-O0-NEXT:    movq %rdx, %rax
907; CHECK-O0-NEXT:    retq
908;
909; CHECK-O3-LABEL: load_fold_urem2:
910; CHECK-O3:       # %bb.0:
911; CHECK-O3-NEXT:    movq (%rdi), %rax
912; CHECK-O3-NEXT:    movq %rax, %rcx
913; CHECK-O3-NEXT:    orq %rsi, %rcx
914; CHECK-O3-NEXT:    shrq $32, %rcx
915; CHECK-O3-NEXT:    je .LBB44_1
916; CHECK-O3-NEXT:  # %bb.2:
917; CHECK-O3-NEXT:    xorl %edx, %edx
918; CHECK-O3-NEXT:    divq %rsi
919; CHECK-O3-NEXT:    movq %rdx, %rax
920; CHECK-O3-NEXT:    retq
921; CHECK-O3-NEXT:  .LBB44_1:
922; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
923; CHECK-O3-NEXT:    xorl %edx, %edx
924; CHECK-O3-NEXT:    divl %esi
925; CHECK-O3-NEXT:    movl %edx, %eax
926; CHECK-O3-NEXT:    retq
927  %v = load atomic i64, ptr %p unordered, align 8
928  %ret = urem i64 %v, %v2
929  ret i64 %ret
930}
931
932define i64 @load_fold_urem3(ptr %p1, ptr %p2) {
933; CHECK-O0-LABEL: load_fold_urem3:
934; CHECK-O0:       # %bb.0:
935; CHECK-O0-NEXT:    movq (%rdi), %rax
936; CHECK-O0-NEXT:    xorl %ecx, %ecx
937; CHECK-O0-NEXT:    movl %ecx, %edx
938; CHECK-O0-NEXT:    divq (%rsi)
939; CHECK-O0-NEXT:    movq %rdx, %rax
940; CHECK-O0-NEXT:    retq
941;
942; CHECK-O3-LABEL: load_fold_urem3:
943; CHECK-O3:       # %bb.0:
944; CHECK-O3-NEXT:    movq (%rdi), %rax
945; CHECK-O3-NEXT:    movq (%rsi), %rcx
946; CHECK-O3-NEXT:    movq %rax, %rdx
947; CHECK-O3-NEXT:    orq %rcx, %rdx
948; CHECK-O3-NEXT:    shrq $32, %rdx
949; CHECK-O3-NEXT:    je .LBB45_1
950; CHECK-O3-NEXT:  # %bb.2:
951; CHECK-O3-NEXT:    xorl %edx, %edx
952; CHECK-O3-NEXT:    divq %rcx
953; CHECK-O3-NEXT:    movq %rdx, %rax
954; CHECK-O3-NEXT:    retq
955; CHECK-O3-NEXT:  .LBB45_1:
956; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
957; CHECK-O3-NEXT:    xorl %edx, %edx
958; CHECK-O3-NEXT:    divl %ecx
959; CHECK-O3-NEXT:    movl %edx, %eax
960; CHECK-O3-NEXT:    retq
961  %v = load atomic i64, ptr %p1 unordered, align 8
962  %v2 = load atomic i64, ptr %p2 unordered, align 8
963  %ret = urem i64 %v, %v2
964  ret i64 %ret
965}
966
967; Legal, as expected
968define i64 @load_fold_shl1(ptr %p) {
969; CHECK-LABEL: load_fold_shl1:
970; CHECK:       # %bb.0:
971; CHECK-NEXT:    movq (%rdi), %rax
972; CHECK-NEXT:    shlq $15, %rax
973; CHECK-NEXT:    retq
974  %v = load atomic i64, ptr %p unordered, align 8
975  %ret = shl i64 %v, 15
976  ret i64 %ret
977}
978
979define i64 @load_fold_shl2(ptr %p, i64 %v2) {
980; CHECK-O0-LABEL: load_fold_shl2:
981; CHECK-O0:       # %bb.0:
982; CHECK-O0-NEXT:    movq %rsi, %rcx
983; CHECK-O0-NEXT:    movq (%rdi), %rax
984; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
985; CHECK-O0-NEXT:    shlq %cl, %rax
986; CHECK-O0-NEXT:    retq
987;
988; CHECK-O3-LABEL: load_fold_shl2:
989; CHECK-O3:       # %bb.0:
990; CHECK-O3-NEXT:    shlxq %rsi, (%rdi), %rax
991; CHECK-O3-NEXT:    retq
992  %v = load atomic i64, ptr %p unordered, align 8
993  %ret = shl i64 %v, %v2
994  ret i64 %ret
995}
996
997define i64 @load_fold_shl3(ptr %p1, ptr %p2) {
998; CHECK-O0-LABEL: load_fold_shl3:
999; CHECK-O0:       # %bb.0:
1000; CHECK-O0-NEXT:    movq (%rdi), %rax
1001; CHECK-O0-NEXT:    movq (%rsi), %rcx
1002; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
1003; CHECK-O0-NEXT:    shlq %cl, %rax
1004; CHECK-O0-NEXT:    retq
1005;
1006; CHECK-O3-LABEL: load_fold_shl3:
1007; CHECK-O3:       # %bb.0:
1008; CHECK-O3-NEXT:    movq (%rsi), %rax
1009; CHECK-O3-NEXT:    shlxq %rax, (%rdi), %rax
1010; CHECK-O3-NEXT:    retq
1011  %v = load atomic i64, ptr %p1 unordered, align 8
1012  %v2 = load atomic i64, ptr %p2 unordered, align 8
1013  %ret = shl i64 %v, %v2
1014  ret i64 %ret
1015}
1016
1017; Legal, as expected
1018define i64 @load_fold_lshr1(ptr %p) {
1019; CHECK-LABEL: load_fold_lshr1:
1020; CHECK:       # %bb.0:
1021; CHECK-NEXT:    movq (%rdi), %rax
1022; CHECK-NEXT:    shrq $15, %rax
1023; CHECK-NEXT:    retq
1024  %v = load atomic i64, ptr %p unordered, align 8
1025  %ret = lshr i64 %v, 15
1026  ret i64 %ret
1027}
1028
1029define i64 @load_fold_lshr2(ptr %p, i64 %v2) {
1030; CHECK-O0-LABEL: load_fold_lshr2:
1031; CHECK-O0:       # %bb.0:
1032; CHECK-O0-NEXT:    movq %rsi, %rcx
1033; CHECK-O0-NEXT:    movq (%rdi), %rax
1034; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
1035; CHECK-O0-NEXT:    shrq %cl, %rax
1036; CHECK-O0-NEXT:    retq
1037;
1038; CHECK-O3-LABEL: load_fold_lshr2:
1039; CHECK-O3:       # %bb.0:
1040; CHECK-O3-NEXT:    shrxq %rsi, (%rdi), %rax
1041; CHECK-O3-NEXT:    retq
1042  %v = load atomic i64, ptr %p unordered, align 8
1043  %ret = lshr i64 %v, %v2
1044  ret i64 %ret
1045}
1046
1047define i64 @load_fold_lshr3(ptr %p1, ptr %p2) {
1048; CHECK-O0-LABEL: load_fold_lshr3:
1049; CHECK-O0:       # %bb.0:
1050; CHECK-O0-NEXT:    movq (%rdi), %rax
1051; CHECK-O0-NEXT:    movq (%rsi), %rcx
1052; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
1053; CHECK-O0-NEXT:    shrq %cl, %rax
1054; CHECK-O0-NEXT:    retq
1055;
1056; CHECK-O3-LABEL: load_fold_lshr3:
1057; CHECK-O3:       # %bb.0:
1058; CHECK-O3-NEXT:    movq (%rsi), %rax
1059; CHECK-O3-NEXT:    shrxq %rax, (%rdi), %rax
1060; CHECK-O3-NEXT:    retq
1061  %v = load atomic i64, ptr %p1 unordered, align 8
1062  %v2 = load atomic i64, ptr %p2 unordered, align 8
1063  %ret = lshr i64 %v, %v2
1064  ret i64 %ret
1065}
1066
1067; Legal, as expected
1068define i64 @load_fold_ashr1(ptr %p) {
1069; CHECK-LABEL: load_fold_ashr1:
1070; CHECK:       # %bb.0:
1071; CHECK-NEXT:    movq (%rdi), %rax
1072; CHECK-NEXT:    sarq $15, %rax
1073; CHECK-NEXT:    retq
1074  %v = load atomic i64, ptr %p unordered, align 8
1075  %ret = ashr i64 %v, 15
1076  ret i64 %ret
1077}
1078
1079define i64 @load_fold_ashr2(ptr %p, i64 %v2) {
1080; CHECK-O0-LABEL: load_fold_ashr2:
1081; CHECK-O0:       # %bb.0:
1082; CHECK-O0-NEXT:    movq %rsi, %rcx
1083; CHECK-O0-NEXT:    movq (%rdi), %rax
1084; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
1085; CHECK-O0-NEXT:    sarq %cl, %rax
1086; CHECK-O0-NEXT:    retq
1087;
1088; CHECK-O3-LABEL: load_fold_ashr2:
1089; CHECK-O3:       # %bb.0:
1090; CHECK-O3-NEXT:    sarxq %rsi, (%rdi), %rax
1091; CHECK-O3-NEXT:    retq
1092  %v = load atomic i64, ptr %p unordered, align 8
1093  %ret = ashr i64 %v, %v2
1094  ret i64 %ret
1095}
1096
1097define i64 @load_fold_ashr3(ptr %p1, ptr %p2) {
1098; CHECK-O0-LABEL: load_fold_ashr3:
1099; CHECK-O0:       # %bb.0:
1100; CHECK-O0-NEXT:    movq (%rdi), %rax
1101; CHECK-O0-NEXT:    movq (%rsi), %rcx
1102; CHECK-O0-NEXT:    # kill: def $cl killed $rcx
1103; CHECK-O0-NEXT:    sarq %cl, %rax
1104; CHECK-O0-NEXT:    retq
1105;
1106; CHECK-O3-LABEL: load_fold_ashr3:
1107; CHECK-O3:       # %bb.0:
1108; CHECK-O3-NEXT:    movq (%rsi), %rax
1109; CHECK-O3-NEXT:    sarxq %rax, (%rdi), %rax
1110; CHECK-O3-NEXT:    retq
1111  %v = load atomic i64, ptr %p1 unordered, align 8
1112  %v2 = load atomic i64, ptr %p2 unordered, align 8
1113  %ret = ashr i64 %v, %v2
1114  ret i64 %ret
1115}
1116
1117; Legal, as expected
1118define i64 @load_fold_and1(ptr %p) {
1119; CHECK-O0-LABEL: load_fold_and1:
1120; CHECK-O0:       # %bb.0:
1121; CHECK-O0-NEXT:    movq (%rdi), %rax
1122; CHECK-O0-NEXT:    andq $15, %rax
1123; CHECK-O0-NEXT:    retq
1124;
1125; CHECK-O3-LABEL: load_fold_and1:
1126; CHECK-O3:       # %bb.0:
1127; CHECK-O3-NEXT:    movq (%rdi), %rax
1128; CHECK-O3-NEXT:    andl $15, %eax
1129; CHECK-O3-NEXT:    retq
1130  %v = load atomic i64, ptr %p unordered, align 8
1131  %ret = and i64 %v, 15
1132  ret i64 %ret
1133}
1134
1135define i64 @load_fold_and2(ptr %p, i64 %v2) {
1136; CHECK-LABEL: load_fold_and2:
1137; CHECK:       # %bb.0:
1138; CHECK-NEXT:    movq %rsi, %rax
1139; CHECK-NEXT:    andq (%rdi), %rax
1140; CHECK-NEXT:    retq
1141  %v = load atomic i64, ptr %p unordered, align 8
1142  %ret = and i64 %v, %v2
1143  ret i64 %ret
1144}
1145
1146define i64 @load_fold_and3(ptr %p1, ptr %p2) {
1147; CHECK-O0-LABEL: load_fold_and3:
1148; CHECK-O0:       # %bb.0:
1149; CHECK-O0-NEXT:    movq (%rdi), %rax
1150; CHECK-O0-NEXT:    andq (%rsi), %rax
1151; CHECK-O0-NEXT:    retq
1152;
1153; CHECK-O3-LABEL: load_fold_and3:
1154; CHECK-O3:       # %bb.0:
1155; CHECK-O3-NEXT:    movq (%rsi), %rax
1156; CHECK-O3-NEXT:    andq (%rdi), %rax
1157; CHECK-O3-NEXT:    retq
1158  %v = load atomic i64, ptr %p1 unordered, align 8
1159  %v2 = load atomic i64, ptr %p2 unordered, align 8
1160  %ret = and i64 %v, %v2
1161  ret i64 %ret
1162}
1163
1164; Legal, as expected
1165define i64 @load_fold_or1(ptr %p) {
1166; CHECK-LABEL: load_fold_or1:
1167; CHECK:       # %bb.0:
1168; CHECK-NEXT:    movq (%rdi), %rax
1169; CHECK-NEXT:    orq $15, %rax
1170; CHECK-NEXT:    retq
1171  %v = load atomic i64, ptr %p unordered, align 8
1172  %ret = or i64 %v, 15
1173  ret i64 %ret
1174}
1175
1176define i64 @load_fold_or2(ptr %p, i64 %v2) {
1177; CHECK-LABEL: load_fold_or2:
1178; CHECK:       # %bb.0:
1179; CHECK-NEXT:    movq %rsi, %rax
1180; CHECK-NEXT:    orq (%rdi), %rax
1181; CHECK-NEXT:    retq
1182  %v = load atomic i64, ptr %p unordered, align 8
1183  %ret = or i64 %v, %v2
1184  ret i64 %ret
1185}
1186
1187define i64 @load_fold_or3(ptr %p1, ptr %p2) {
1188; CHECK-O0-LABEL: load_fold_or3:
1189; CHECK-O0:       # %bb.0:
1190; CHECK-O0-NEXT:    movq (%rdi), %rax
1191; CHECK-O0-NEXT:    orq (%rsi), %rax
1192; CHECK-O0-NEXT:    retq
1193;
1194; CHECK-O3-LABEL: load_fold_or3:
1195; CHECK-O3:       # %bb.0:
1196; CHECK-O3-NEXT:    movq (%rsi), %rax
1197; CHECK-O3-NEXT:    orq (%rdi), %rax
1198; CHECK-O3-NEXT:    retq
1199  %v = load atomic i64, ptr %p1 unordered, align 8
1200  %v2 = load atomic i64, ptr %p2 unordered, align 8
1201  %ret = or i64 %v, %v2
1202  ret i64 %ret
1203}
1204
1205; Legal, as expected
1206define i64 @load_fold_xor1(ptr %p) {
1207; CHECK-LABEL: load_fold_xor1:
1208; CHECK:       # %bb.0:
1209; CHECK-NEXT:    movq (%rdi), %rax
1210; CHECK-NEXT:    xorq $15, %rax
1211; CHECK-NEXT:    retq
1212  %v = load atomic i64, ptr %p unordered, align 8
1213  %ret = xor i64 %v, 15
1214  ret i64 %ret
1215}
1216
1217define i64 @load_fold_xor2(ptr %p, i64 %v2) {
1218; CHECK-LABEL: load_fold_xor2:
1219; CHECK:       # %bb.0:
1220; CHECK-NEXT:    movq %rsi, %rax
1221; CHECK-NEXT:    xorq (%rdi), %rax
1222; CHECK-NEXT:    retq
1223  %v = load atomic i64, ptr %p unordered, align 8
1224  %ret = xor i64 %v, %v2
1225  ret i64 %ret
1226}
1227
1228define i64 @load_fold_xor3(ptr %p1, ptr %p2) {
1229; CHECK-O0-LABEL: load_fold_xor3:
1230; CHECK-O0:       # %bb.0:
1231; CHECK-O0-NEXT:    movq (%rdi), %rax
1232; CHECK-O0-NEXT:    xorq (%rsi), %rax
1233; CHECK-O0-NEXT:    retq
1234;
1235; CHECK-O3-LABEL: load_fold_xor3:
1236; CHECK-O3:       # %bb.0:
1237; CHECK-O3-NEXT:    movq (%rsi), %rax
1238; CHECK-O3-NEXT:    xorq (%rdi), %rax
1239; CHECK-O3-NEXT:    retq
1240  %v = load atomic i64, ptr %p1 unordered, align 8
1241  %v2 = load atomic i64, ptr %p2 unordered, align 8
1242  %ret = xor i64 %v, %v2
1243  ret i64 %ret
1244}
1245
1246define i1 @load_fold_icmp1(ptr %p) {
1247; CHECK-O0-LABEL: load_fold_icmp1:
1248; CHECK-O0:       # %bb.0:
1249; CHECK-O0-NEXT:    movq (%rdi), %rax
1250; CHECK-O0-NEXT:    subq $15, %rax
1251; CHECK-O0-NEXT:    sete %al
1252; CHECK-O0-NEXT:    retq
1253;
1254; CHECK-O3-LABEL: load_fold_icmp1:
1255; CHECK-O3:       # %bb.0:
1256; CHECK-O3-NEXT:    cmpq $15, (%rdi)
1257; CHECK-O3-NEXT:    sete %al
1258; CHECK-O3-NEXT:    retq
1259  %v = load atomic i64, ptr %p unordered, align 8
1260  %ret = icmp eq i64 %v, 15
1261  ret i1 %ret
1262}
1263
1264define i1 @load_fold_icmp2(ptr %p, i64 %v2) {
1265; CHECK-O0-LABEL: load_fold_icmp2:
1266; CHECK-O0:       # %bb.0:
1267; CHECK-O0-NEXT:    movq (%rdi), %rax
1268; CHECK-O0-NEXT:    subq %rsi, %rax
1269; CHECK-O0-NEXT:    sete %al
1270; CHECK-O0-NEXT:    retq
1271;
1272; CHECK-O3-LABEL: load_fold_icmp2:
1273; CHECK-O3:       # %bb.0:
1274; CHECK-O3-NEXT:    cmpq %rsi, (%rdi)
1275; CHECK-O3-NEXT:    sete %al
1276; CHECK-O3-NEXT:    retq
1277  %v = load atomic i64, ptr %p unordered, align 8
1278  %ret = icmp eq i64 %v, %v2
1279  ret i1 %ret
1280}
1281
1282define i1 @load_fold_icmp3(ptr %p1, ptr %p2) {
1283; CHECK-O0-LABEL: load_fold_icmp3:
1284; CHECK-O0:       # %bb.0:
1285; CHECK-O0-NEXT:    movq (%rdi), %rax
1286; CHECK-O0-NEXT:    movq (%rsi), %rcx
1287; CHECK-O0-NEXT:    subq %rcx, %rax
1288; CHECK-O0-NEXT:    sete %al
1289; CHECK-O0-NEXT:    retq
1290;
1291; CHECK-O3-LABEL: load_fold_icmp3:
1292; CHECK-O3:       # %bb.0:
1293; CHECK-O3-NEXT:    movq (%rsi), %rax
1294; CHECK-O3-NEXT:    cmpq %rax, (%rdi)
1295; CHECK-O3-NEXT:    sete %al
1296; CHECK-O3-NEXT:    retq
1297  %v = load atomic i64, ptr %p1 unordered, align 8
1298  %v2 = load atomic i64, ptr %p2 unordered, align 8
1299  %ret = icmp eq i64 %v, %v2
1300  ret i1 %ret
1301}
1302
1303
1304;; The next batch of tests check for read-modify-write patterns
1305;; Legally, it's okay to use a memory operand here as long as the operand
1306;; is well aligned (i.e. doesn't cross a cache line boundary).  We are
1307;; required not to narrow the store though!
1308
1309; Legal, as expected
1310define void @rmw_fold_add1(ptr %p, i64 %v) {
1311; CHECK-O0-LABEL: rmw_fold_add1:
1312; CHECK-O0:       # %bb.0:
1313; CHECK-O0-NEXT:    movq (%rdi), %rax
1314; CHECK-O0-NEXT:    addq $15, %rax
1315; CHECK-O0-NEXT:    movq %rax, (%rdi)
1316; CHECK-O0-NEXT:    retq
1317;
1318; CHECK-O3-LABEL: rmw_fold_add1:
1319; CHECK-O3:       # %bb.0:
1320; CHECK-O3-NEXT:    addq $15, (%rdi)
1321; CHECK-O3-NEXT:    retq
1322  %prev = load atomic i64, ptr %p unordered, align 8
1323  %val = add i64 %prev, 15
1324  store atomic i64 %val, ptr %p unordered, align 8
1325  ret void
1326}
1327
1328; Legal, as expected
1329define void @rmw_fold_add2(ptr %p, i64 %v) {
1330; CHECK-O0-LABEL: rmw_fold_add2:
1331; CHECK-O0:       # %bb.0:
1332; CHECK-O0-NEXT:    movq (%rdi), %rax
1333; CHECK-O0-NEXT:    addq %rsi, %rax
1334; CHECK-O0-NEXT:    movq %rax, (%rdi)
1335; CHECK-O0-NEXT:    retq
1336;
1337; CHECK-O3-LABEL: rmw_fold_add2:
1338; CHECK-O3:       # %bb.0:
1339; CHECK-O3-NEXT:    addq %rsi, (%rdi)
1340; CHECK-O3-NEXT:    retq
1341  %prev = load atomic i64, ptr %p unordered, align 8
1342  %val = add i64 %prev, %v
1343  store atomic i64 %val, ptr %p unordered, align 8
1344  ret void
1345}
1346
1347; Legal, as expected
1348define void @rmw_fold_sub1(ptr %p, i64 %v) {
1349; CHECK-O0-LABEL: rmw_fold_sub1:
1350; CHECK-O0:       # %bb.0:
1351; CHECK-O0-NEXT:    movq (%rdi), %rax
1352; CHECK-O0-NEXT:    addq $-15, %rax
1353; CHECK-O0-NEXT:    movq %rax, (%rdi)
1354; CHECK-O0-NEXT:    retq
1355;
1356; CHECK-O3-LABEL: rmw_fold_sub1:
1357; CHECK-O3:       # %bb.0:
1358; CHECK-O3-NEXT:    addq $-15, (%rdi)
1359; CHECK-O3-NEXT:    retq
1360  %prev = load atomic i64, ptr %p unordered, align 8
1361  %val = sub i64 %prev, 15
1362  store atomic i64 %val, ptr %p unordered, align 8
1363  ret void
1364}
1365
1366; Legal, as expected
1367define void @rmw_fold_sub2(ptr %p, i64 %v) {
1368; CHECK-O0-LABEL: rmw_fold_sub2:
1369; CHECK-O0:       # %bb.0:
1370; CHECK-O0-NEXT:    movq (%rdi), %rax
1371; CHECK-O0-NEXT:    subq %rsi, %rax
1372; CHECK-O0-NEXT:    movq %rax, (%rdi)
1373; CHECK-O0-NEXT:    retq
1374;
1375; CHECK-O3-LABEL: rmw_fold_sub2:
1376; CHECK-O3:       # %bb.0:
1377; CHECK-O3-NEXT:    subq %rsi, (%rdi)
1378; CHECK-O3-NEXT:    retq
1379  %prev = load atomic i64, ptr %p unordered, align 8
1380  %val = sub i64 %prev, %v
1381  store atomic i64 %val, ptr %p unordered, align 8
1382  ret void
1383}
1384
1385; Legal, as expected
1386define void @rmw_fold_mul1(ptr %p, i64 %v) {
1387; CHECK-LABEL: rmw_fold_mul1:
1388; CHECK:       # %bb.0:
1389; CHECK-NEXT:    movq (%rdi), %rax
1390; CHECK-NEXT:    leaq (%rax,%rax,4), %rax
1391; CHECK-NEXT:    leaq (%rax,%rax,2), %rax
1392; CHECK-NEXT:    movq %rax, (%rdi)
1393; CHECK-NEXT:    retq
1394  %prev = load atomic i64, ptr %p unordered, align 8
1395  %val = mul i64 %prev, 15
1396  store atomic i64 %val, ptr %p unordered, align 8
1397  ret void
1398}
1399
1400; Legal to fold (TODO)
1401define void @rmw_fold_mul2(ptr %p, i64 %v) {
1402; CHECK-O0-LABEL: rmw_fold_mul2:
1403; CHECK-O0:       # %bb.0:
1404; CHECK-O0-NEXT:    movq (%rdi), %rax
1405; CHECK-O0-NEXT:    imulq %rsi, %rax
1406; CHECK-O0-NEXT:    movq %rax, (%rdi)
1407; CHECK-O0-NEXT:    retq
1408;
1409; CHECK-O3-LABEL: rmw_fold_mul2:
1410; CHECK-O3:       # %bb.0:
1411; CHECK-O3-NEXT:    imulq (%rdi), %rsi
1412; CHECK-O3-NEXT:    movq %rsi, (%rdi)
1413; CHECK-O3-NEXT:    retq
1414  %prev = load atomic i64, ptr %p unordered, align 8
1415  %val = mul i64 %prev, %v
1416  store atomic i64 %val, ptr %p unordered, align 8
1417  ret void
1418}
1419
1420; Legal, as expected
1421define void @rmw_fold_sdiv1(ptr %p, i64 %v) {
1422; CHECK-O0-LABEL: rmw_fold_sdiv1:
1423; CHECK-O0:       # %bb.0:
1424; CHECK-O0-NEXT:    movq (%rdi), %rcx
1425; CHECK-O0-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
1426; CHECK-O0-NEXT:    movq %rcx, %rax
1427; CHECK-O0-NEXT:    imulq %rdx
1428; CHECK-O0-NEXT:    movq %rdx, %rax
1429; CHECK-O0-NEXT:    addq %rcx, %rax
1430; CHECK-O0-NEXT:    movq %rax, %rcx
1431; CHECK-O0-NEXT:    shrq $63, %rcx
1432; CHECK-O0-NEXT:    sarq $3, %rax
1433; CHECK-O0-NEXT:    addq %rcx, %rax
1434; CHECK-O0-NEXT:    movq %rax, (%rdi)
1435; CHECK-O0-NEXT:    retq
1436;
1437; CHECK-O3-LABEL: rmw_fold_sdiv1:
1438; CHECK-O3:       # %bb.0:
1439; CHECK-O3-NEXT:    movq (%rdi), %rcx
1440; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
1441; CHECK-O3-NEXT:    movq %rcx, %rax
1442; CHECK-O3-NEXT:    imulq %rdx
1443; CHECK-O3-NEXT:    addq %rcx, %rdx
1444; CHECK-O3-NEXT:    movq %rdx, %rax
1445; CHECK-O3-NEXT:    shrq $63, %rax
1446; CHECK-O3-NEXT:    sarq $3, %rdx
1447; CHECK-O3-NEXT:    addq %rax, %rdx
1448; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1449; CHECK-O3-NEXT:    retq
1450  %prev = load atomic i64, ptr %p unordered, align 8
1451  %val = sdiv i64 %prev, 15
1452  store atomic i64 %val, ptr %p unordered, align 8
1453  ret void
1454}
1455
1456; Legal, as expected
1457define void @rmw_fold_sdiv2(ptr %p, i64 %v) {
1458; CHECK-O0-LABEL: rmw_fold_sdiv2:
1459; CHECK-O0:       # %bb.0:
1460; CHECK-O0-NEXT:    movq (%rdi), %rax
1461; CHECK-O0-NEXT:    cqto
1462; CHECK-O0-NEXT:    idivq %rsi
1463; CHECK-O0-NEXT:    movq %rax, (%rdi)
1464; CHECK-O0-NEXT:    retq
1465;
1466; CHECK-O3-LABEL: rmw_fold_sdiv2:
1467; CHECK-O3:       # %bb.0:
1468; CHECK-O3-NEXT:    movq (%rdi), %rax
1469; CHECK-O3-NEXT:    movq %rax, %rcx
1470; CHECK-O3-NEXT:    orq %rsi, %rcx
1471; CHECK-O3-NEXT:    shrq $32, %rcx
1472; CHECK-O3-NEXT:    je .LBB74_1
1473; CHECK-O3-NEXT:  # %bb.2:
1474; CHECK-O3-NEXT:    cqto
1475; CHECK-O3-NEXT:    idivq %rsi
1476; CHECK-O3-NEXT:    movq %rax, (%rdi)
1477; CHECK-O3-NEXT:    retq
1478; CHECK-O3-NEXT:  .LBB74_1:
1479; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1480; CHECK-O3-NEXT:    xorl %edx, %edx
1481; CHECK-O3-NEXT:    divl %esi
1482; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
1483; CHECK-O3-NEXT:    movq %rax, (%rdi)
1484; CHECK-O3-NEXT:    retq
1485  %prev = load atomic i64, ptr %p unordered, align 8
1486  %val = sdiv i64 %prev, %v
1487  store atomic i64 %val, ptr %p unordered, align 8
1488  ret void
1489}
1490
1491; Legal, as expected
1492define void @rmw_fold_udiv1(ptr %p, i64 %v) {
1493; CHECK-LABEL: rmw_fold_udiv1:
1494; CHECK:       # %bb.0:
1495; CHECK-NEXT:    movq (%rdi), %rdx
1496; CHECK-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
1497; CHECK-NEXT:    mulxq %rax, %rax, %rax
1498; CHECK-NEXT:    shrq $3, %rax
1499; CHECK-NEXT:    movq %rax, (%rdi)
1500; CHECK-NEXT:    retq
1501  %prev = load atomic i64, ptr %p unordered, align 8
1502  %val = udiv i64 %prev, 15
1503  store atomic i64 %val, ptr %p unordered, align 8
1504  ret void
1505}
1506
1507; Legal, as expected
1508define void @rmw_fold_udiv2(ptr %p, i64 %v) {
1509; CHECK-O0-LABEL: rmw_fold_udiv2:
1510; CHECK-O0:       # %bb.0:
1511; CHECK-O0-NEXT:    movq (%rdi), %rax
1512; CHECK-O0-NEXT:    xorl %ecx, %ecx
1513; CHECK-O0-NEXT:    movl %ecx, %edx
1514; CHECK-O0-NEXT:    divq %rsi
1515; CHECK-O0-NEXT:    movq %rax, (%rdi)
1516; CHECK-O0-NEXT:    retq
1517;
1518; CHECK-O3-LABEL: rmw_fold_udiv2:
1519; CHECK-O3:       # %bb.0:
1520; CHECK-O3-NEXT:    movq (%rdi), %rax
1521; CHECK-O3-NEXT:    movq %rax, %rcx
1522; CHECK-O3-NEXT:    orq %rsi, %rcx
1523; CHECK-O3-NEXT:    shrq $32, %rcx
1524; CHECK-O3-NEXT:    je .LBB76_1
1525; CHECK-O3-NEXT:  # %bb.2:
1526; CHECK-O3-NEXT:    xorl %edx, %edx
1527; CHECK-O3-NEXT:    divq %rsi
1528; CHECK-O3-NEXT:    movq %rax, (%rdi)
1529; CHECK-O3-NEXT:    retq
1530; CHECK-O3-NEXT:  .LBB76_1:
1531; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1532; CHECK-O3-NEXT:    xorl %edx, %edx
1533; CHECK-O3-NEXT:    divl %esi
1534; CHECK-O3-NEXT:    # kill: def $eax killed $eax def $rax
1535; CHECK-O3-NEXT:    movq %rax, (%rdi)
1536; CHECK-O3-NEXT:    retq
1537  %prev = load atomic i64, ptr %p unordered, align 8
1538  %val = udiv i64 %prev, %v
1539  store atomic i64 %val, ptr %p unordered, align 8
1540  ret void
1541}
1542
1543; Legal, as expected
1544define void @rmw_fold_srem1(ptr %p, i64 %v) {
1545; CHECK-O0-LABEL: rmw_fold_srem1:
1546; CHECK-O0:       # %bb.0:
1547; CHECK-O0-NEXT:    movq (%rdi), %rax
1548; CHECK-O0-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1549; CHECK-O0-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
1550; CHECK-O0-NEXT:    imulq %rcx
1551; CHECK-O0-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
1552; CHECK-O0-NEXT:    movq %rdx, %rcx
1553; CHECK-O0-NEXT:    addq %rax, %rcx
1554; CHECK-O0-NEXT:    movq %rcx, %rdx
1555; CHECK-O0-NEXT:    shrq $63, %rdx
1556; CHECK-O0-NEXT:    sarq $3, %rcx
1557; CHECK-O0-NEXT:    addq %rdx, %rcx
1558; CHECK-O0-NEXT:    leaq (%rcx,%rcx,4), %rcx
1559; CHECK-O0-NEXT:    leaq (%rcx,%rcx,2), %rcx
1560; CHECK-O0-NEXT:    subq %rcx, %rax
1561; CHECK-O0-NEXT:    movq %rax, (%rdi)
1562; CHECK-O0-NEXT:    retq
1563;
1564; CHECK-O3-LABEL: rmw_fold_srem1:
1565; CHECK-O3:       # %bb.0:
1566; CHECK-O3-NEXT:    movq (%rdi), %rcx
1567; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889
1568; CHECK-O3-NEXT:    movq %rcx, %rax
1569; CHECK-O3-NEXT:    imulq %rdx
1570; CHECK-O3-NEXT:    addq %rcx, %rdx
1571; CHECK-O3-NEXT:    movq %rdx, %rax
1572; CHECK-O3-NEXT:    shrq $63, %rax
1573; CHECK-O3-NEXT:    sarq $3, %rdx
1574; CHECK-O3-NEXT:    addq %rax, %rdx
1575; CHECK-O3-NEXT:    leaq (%rdx,%rdx,4), %rax
1576; CHECK-O3-NEXT:    leaq (%rax,%rax,2), %rax
1577; CHECK-O3-NEXT:    subq %rax, %rcx
1578; CHECK-O3-NEXT:    movq %rcx, (%rdi)
1579; CHECK-O3-NEXT:    retq
1580  %prev = load atomic i64, ptr %p unordered, align 8
1581  %val = srem i64 %prev, 15
1582  store atomic i64 %val, ptr %p unordered, align 8
1583  ret void
1584}
1585
1586; Legal, as expected
1587define void @rmw_fold_srem2(ptr %p, i64 %v) {
1588; CHECK-O0-LABEL: rmw_fold_srem2:
1589; CHECK-O0:       # %bb.0:
1590; CHECK-O0-NEXT:    movq (%rdi), %rax
1591; CHECK-O0-NEXT:    cqto
1592; CHECK-O0-NEXT:    idivq %rsi
1593; CHECK-O0-NEXT:    movq %rdx, (%rdi)
1594; CHECK-O0-NEXT:    retq
1595;
1596; CHECK-O3-LABEL: rmw_fold_srem2:
1597; CHECK-O3:       # %bb.0:
1598; CHECK-O3-NEXT:    movq (%rdi), %rax
1599; CHECK-O3-NEXT:    movq %rax, %rcx
1600; CHECK-O3-NEXT:    orq %rsi, %rcx
1601; CHECK-O3-NEXT:    shrq $32, %rcx
1602; CHECK-O3-NEXT:    je .LBB78_1
1603; CHECK-O3-NEXT:  # %bb.2:
1604; CHECK-O3-NEXT:    cqto
1605; CHECK-O3-NEXT:    idivq %rsi
1606; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1607; CHECK-O3-NEXT:    retq
1608; CHECK-O3-NEXT:  .LBB78_1:
1609; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1610; CHECK-O3-NEXT:    xorl %edx, %edx
1611; CHECK-O3-NEXT:    divl %esi
1612; CHECK-O3-NEXT:    # kill: def $edx killed $edx def $rdx
1613; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1614; CHECK-O3-NEXT:    retq
1615  %prev = load atomic i64, ptr %p unordered, align 8
1616  %val = srem i64 %prev, %v
1617  store atomic i64 %val, ptr %p unordered, align 8
1618  ret void
1619}
1620
1621; Legal, as expected
1622define void @rmw_fold_urem1(ptr %p, i64 %v) {
1623; CHECK-O0-LABEL: rmw_fold_urem1:
1624; CHECK-O0:       # %bb.0:
1625; CHECK-O0-NEXT:    movq (%rdi), %rax
1626; CHECK-O0-NEXT:    movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889
1627; CHECK-O0-NEXT:    movq %rax, %rdx
1628; CHECK-O0-NEXT:    mulxq %rcx, %rcx, %rcx
1629; CHECK-O0-NEXT:    shrq $3, %rcx
1630; CHECK-O0-NEXT:    leaq (%rcx,%rcx,4), %rcx
1631; CHECK-O0-NEXT:    leaq (%rcx,%rcx,2), %rcx
1632; CHECK-O0-NEXT:    subq %rcx, %rax
1633; CHECK-O0-NEXT:    movq %rax, (%rdi)
1634; CHECK-O0-NEXT:    retq
1635;
1636; CHECK-O3-LABEL: rmw_fold_urem1:
1637; CHECK-O3:       # %bb.0:
1638; CHECK-O3-NEXT:    movq (%rdi), %rdx
1639; CHECK-O3-NEXT:    movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889
1640; CHECK-O3-NEXT:    mulxq %rax, %rax, %rax
1641; CHECK-O3-NEXT:    shrq $3, %rax
1642; CHECK-O3-NEXT:    leaq (%rax,%rax,4), %rax
1643; CHECK-O3-NEXT:    leaq (%rax,%rax,2), %rax
1644; CHECK-O3-NEXT:    subq %rax, %rdx
1645; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1646; CHECK-O3-NEXT:    retq
1647  %prev = load atomic i64, ptr %p unordered, align 8
1648  %val = urem i64 %prev, 15
1649  store atomic i64 %val, ptr %p unordered, align 8
1650  ret void
1651}
1652
1653; Legal, as expected
1654define void @rmw_fold_urem2(ptr %p, i64 %v) {
1655; CHECK-O0-LABEL: rmw_fold_urem2:
1656; CHECK-O0:       # %bb.0:
1657; CHECK-O0-NEXT:    movq (%rdi), %rax
1658; CHECK-O0-NEXT:    xorl %ecx, %ecx
1659; CHECK-O0-NEXT:    movl %ecx, %edx
1660; CHECK-O0-NEXT:    divq %rsi
1661; CHECK-O0-NEXT:    movq %rdx, (%rdi)
1662; CHECK-O0-NEXT:    retq
1663;
1664; CHECK-O3-LABEL: rmw_fold_urem2:
1665; CHECK-O3:       # %bb.0:
1666; CHECK-O3-NEXT:    movq (%rdi), %rax
1667; CHECK-O3-NEXT:    movq %rax, %rcx
1668; CHECK-O3-NEXT:    orq %rsi, %rcx
1669; CHECK-O3-NEXT:    shrq $32, %rcx
1670; CHECK-O3-NEXT:    je .LBB80_1
1671; CHECK-O3-NEXT:  # %bb.2:
1672; CHECK-O3-NEXT:    xorl %edx, %edx
1673; CHECK-O3-NEXT:    divq %rsi
1674; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1675; CHECK-O3-NEXT:    retq
1676; CHECK-O3-NEXT:  .LBB80_1:
1677; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1678; CHECK-O3-NEXT:    xorl %edx, %edx
1679; CHECK-O3-NEXT:    divl %esi
1680; CHECK-O3-NEXT:    # kill: def $edx killed $edx def $rdx
1681; CHECK-O3-NEXT:    movq %rdx, (%rdi)
1682; CHECK-O3-NEXT:    retq
1683  %prev = load atomic i64, ptr %p unordered, align 8
1684  %val = urem i64 %prev, %v
1685  store atomic i64 %val, ptr %p unordered, align 8
1686  ret void
1687}
1688
1689; Legal to fold (TODO)
1690define void @rmw_fold_shl1(ptr %p, i64 %v) {
1691; CHECK-LABEL: rmw_fold_shl1:
1692; CHECK:       # %bb.0:
1693; CHECK-NEXT:    movq (%rdi), %rax
1694; CHECK-NEXT:    shlq $15, %rax
1695; CHECK-NEXT:    movq %rax, (%rdi)
1696; CHECK-NEXT:    retq
1697  %prev = load atomic i64, ptr %p unordered, align 8
1698  %val = shl i64 %prev, 15
1699  store atomic i64 %val, ptr %p unordered, align 8
1700  ret void
1701}
1702
1703; Legal to fold (TODO)
1704define void @rmw_fold_shl2(ptr %p, i64 %v) {
1705; CHECK-O0-LABEL: rmw_fold_shl2:
1706; CHECK-O0:       # %bb.0:
1707; CHECK-O0-NEXT:    movq (%rdi), %rax
1708; CHECK-O0-NEXT:    movb %sil, %dl
1709; CHECK-O0-NEXT:    # implicit-def: $rcx
1710; CHECK-O0-NEXT:    movb %dl, %cl
1711; CHECK-O0-NEXT:    shlxq %rcx, %rax, %rax
1712; CHECK-O0-NEXT:    movq %rax, (%rdi)
1713; CHECK-O0-NEXT:    retq
1714;
1715; CHECK-O3-LABEL: rmw_fold_shl2:
1716; CHECK-O3:       # %bb.0:
1717; CHECK-O3-NEXT:    shlxq %rsi, (%rdi), %rax
1718; CHECK-O3-NEXT:    movq %rax, (%rdi)
1719; CHECK-O3-NEXT:    retq
1720  %prev = load atomic i64, ptr %p unordered, align 8
1721  %val = shl i64 %prev, %v
1722  store atomic i64 %val, ptr %p unordered, align 8
1723  ret void
1724}
1725
1726; Legal to fold (TODO)
1727define void @rmw_fold_lshr1(ptr %p, i64 %v) {
1728; CHECK-LABEL: rmw_fold_lshr1:
1729; CHECK:       # %bb.0:
1730; CHECK-NEXT:    movq (%rdi), %rax
1731; CHECK-NEXT:    shrq $15, %rax
1732; CHECK-NEXT:    movq %rax, (%rdi)
1733; CHECK-NEXT:    retq
1734  %prev = load atomic i64, ptr %p unordered, align 8
1735  %val = lshr i64 %prev, 15
1736  store atomic i64 %val, ptr %p unordered, align 8
1737  ret void
1738}
1739
1740; Legal to fold (TODO)
1741define void @rmw_fold_lshr2(ptr %p, i64 %v) {
1742; CHECK-O0-LABEL: rmw_fold_lshr2:
1743; CHECK-O0:       # %bb.0:
1744; CHECK-O0-NEXT:    movq (%rdi), %rax
1745; CHECK-O0-NEXT:    movb %sil, %dl
1746; CHECK-O0-NEXT:    # implicit-def: $rcx
1747; CHECK-O0-NEXT:    movb %dl, %cl
1748; CHECK-O0-NEXT:    shrxq %rcx, %rax, %rax
1749; CHECK-O0-NEXT:    movq %rax, (%rdi)
1750; CHECK-O0-NEXT:    retq
1751;
1752; CHECK-O3-LABEL: rmw_fold_lshr2:
1753; CHECK-O3:       # %bb.0:
1754; CHECK-O3-NEXT:    shrxq %rsi, (%rdi), %rax
1755; CHECK-O3-NEXT:    movq %rax, (%rdi)
1756; CHECK-O3-NEXT:    retq
1757  %prev = load atomic i64, ptr %p unordered, align 8
1758  %val = lshr i64 %prev, %v
1759  store atomic i64 %val, ptr %p unordered, align 8
1760  ret void
1761}
1762
1763; Legal to fold (TODO)
1764define void @rmw_fold_ashr1(ptr %p, i64 %v) {
1765; CHECK-LABEL: rmw_fold_ashr1:
1766; CHECK:       # %bb.0:
1767; CHECK-NEXT:    movq (%rdi), %rax
1768; CHECK-NEXT:    sarq $15, %rax
1769; CHECK-NEXT:    movq %rax, (%rdi)
1770; CHECK-NEXT:    retq
1771  %prev = load atomic i64, ptr %p unordered, align 8
1772  %val = ashr i64 %prev, 15
1773  store atomic i64 %val, ptr %p unordered, align 8
1774  ret void
1775}
1776
1777; Legal to fold (TODO)
1778define void @rmw_fold_ashr2(ptr %p, i64 %v) {
1779; CHECK-O0-LABEL: rmw_fold_ashr2:
1780; CHECK-O0:       # %bb.0:
1781; CHECK-O0-NEXT:    movq (%rdi), %rax
1782; CHECK-O0-NEXT:    movb %sil, %dl
1783; CHECK-O0-NEXT:    # implicit-def: $rcx
1784; CHECK-O0-NEXT:    movb %dl, %cl
1785; CHECK-O0-NEXT:    sarxq %rcx, %rax, %rax
1786; CHECK-O0-NEXT:    movq %rax, (%rdi)
1787; CHECK-O0-NEXT:    retq
1788;
1789; CHECK-O3-LABEL: rmw_fold_ashr2:
1790; CHECK-O3:       # %bb.0:
1791; CHECK-O3-NEXT:    sarxq %rsi, (%rdi), %rax
1792; CHECK-O3-NEXT:    movq %rax, (%rdi)
1793; CHECK-O3-NEXT:    retq
1794  %prev = load atomic i64, ptr %p unordered, align 8
1795  %val = ashr i64 %prev, %v
1796  store atomic i64 %val, ptr %p unordered, align 8
1797  ret void
1798}
1799
1800; Legal, as expected
1801define void @rmw_fold_and1(ptr %p, i64 %v) {
1802; CHECK-O0-LABEL: rmw_fold_and1:
1803; CHECK-O0:       # %bb.0:
1804; CHECK-O0-NEXT:    movq (%rdi), %rax
1805; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
1806; CHECK-O0-NEXT:    andl $15, %eax
1807; CHECK-O0-NEXT:    # kill: def $rax killed $eax
1808; CHECK-O0-NEXT:    movq %rax, (%rdi)
1809; CHECK-O0-NEXT:    retq
1810;
1811; CHECK-O3-LABEL: rmw_fold_and1:
1812; CHECK-O3:       # %bb.0:
1813; CHECK-O3-NEXT:    andq $15, (%rdi)
1814; CHECK-O3-NEXT:    retq
1815  %prev = load atomic i64, ptr %p unordered, align 8
1816  %val = and i64 %prev, 15
1817  store atomic i64 %val, ptr %p unordered, align 8
1818  ret void
1819}
1820
1821; Legal, as expected
1822define void @rmw_fold_and2(ptr %p, i64 %v) {
1823; CHECK-O0-LABEL: rmw_fold_and2:
1824; CHECK-O0:       # %bb.0:
1825; CHECK-O0-NEXT:    movq (%rdi), %rax
1826; CHECK-O0-NEXT:    andq %rsi, %rax
1827; CHECK-O0-NEXT:    movq %rax, (%rdi)
1828; CHECK-O0-NEXT:    retq
1829;
1830; CHECK-O3-LABEL: rmw_fold_and2:
1831; CHECK-O3:       # %bb.0:
1832; CHECK-O3-NEXT:    andq %rsi, (%rdi)
1833; CHECK-O3-NEXT:    retq
1834  %prev = load atomic i64, ptr %p unordered, align 8
1835  %val = and i64 %prev, %v
1836  store atomic i64 %val, ptr %p unordered, align 8
1837  ret void
1838}
1839
1840; Legal, as expected
1841define void @rmw_fold_or1(ptr %p, i64 %v) {
1842; CHECK-O0-LABEL: rmw_fold_or1:
1843; CHECK-O0:       # %bb.0:
1844; CHECK-O0-NEXT:    movq (%rdi), %rax
1845; CHECK-O0-NEXT:    orq $15, %rax
1846; CHECK-O0-NEXT:    movq %rax, (%rdi)
1847; CHECK-O0-NEXT:    retq
1848;
1849; CHECK-O3-LABEL: rmw_fold_or1:
1850; CHECK-O3:       # %bb.0:
1851; CHECK-O3-NEXT:    orq $15, (%rdi)
1852; CHECK-O3-NEXT:    retq
1853  %prev = load atomic i64, ptr %p unordered, align 8
1854  %val = or i64 %prev, 15
1855  store atomic i64 %val, ptr %p unordered, align 8
1856  ret void
1857}
1858
1859; Legal, as expected
1860define void @rmw_fold_or2(ptr %p, i64 %v) {
1861; CHECK-O0-LABEL: rmw_fold_or2:
1862; CHECK-O0:       # %bb.0:
1863; CHECK-O0-NEXT:    movq (%rdi), %rax
1864; CHECK-O0-NEXT:    orq %rsi, %rax
1865; CHECK-O0-NEXT:    movq %rax, (%rdi)
1866; CHECK-O0-NEXT:    retq
1867;
1868; CHECK-O3-LABEL: rmw_fold_or2:
1869; CHECK-O3:       # %bb.0:
1870; CHECK-O3-NEXT:    orq %rsi, (%rdi)
1871; CHECK-O3-NEXT:    retq
1872  %prev = load atomic i64, ptr %p unordered, align 8
1873  %val = or i64 %prev, %v
1874  store atomic i64 %val, ptr %p unordered, align 8
1875  ret void
1876}
1877
1878; Legal, as expected
1879define void @rmw_fold_xor1(ptr %p, i64 %v) {
1880; CHECK-O0-LABEL: rmw_fold_xor1:
1881; CHECK-O0:       # %bb.0:
1882; CHECK-O0-NEXT:    movq (%rdi), %rax
1883; CHECK-O0-NEXT:    xorq $15, %rax
1884; CHECK-O0-NEXT:    movq %rax, (%rdi)
1885; CHECK-O0-NEXT:    retq
1886;
1887; CHECK-O3-LABEL: rmw_fold_xor1:
1888; CHECK-O3:       # %bb.0:
1889; CHECK-O3-NEXT:    xorq $15, (%rdi)
1890; CHECK-O3-NEXT:    retq
1891  %prev = load atomic i64, ptr %p unordered, align 8
1892  %val = xor i64 %prev, 15
1893  store atomic i64 %val, ptr %p unordered, align 8
1894  ret void
1895}
1896
1897; Legal, as expected
1898define void @rmw_fold_xor2(ptr %p, i64 %v) {
1899; CHECK-O0-LABEL: rmw_fold_xor2:
1900; CHECK-O0:       # %bb.0:
1901; CHECK-O0-NEXT:    movq (%rdi), %rax
1902; CHECK-O0-NEXT:    xorq %rsi, %rax
1903; CHECK-O0-NEXT:    movq %rax, (%rdi)
1904; CHECK-O0-NEXT:    retq
1905;
1906; CHECK-O3-LABEL: rmw_fold_xor2:
1907; CHECK-O3:       # %bb.0:
1908; CHECK-O3-NEXT:    xorq %rsi, (%rdi)
1909; CHECK-O3-NEXT:    retq
1910  %prev = load atomic i64, ptr %p unordered, align 8
1911  %val = xor i64 %prev, %v
1912  store atomic i64 %val, ptr %p unordered, align 8
1913  ret void
1914}
1915
1916;; The next batch test truncations, in combination w/operations which could
1917;; be folded against the memory operation.
1918
1919; Legal to reduce the load width (TODO)
1920define i32 @fold_trunc(ptr %p) {
1921; CHECK-LABEL: fold_trunc:
1922; CHECK:       # %bb.0:
1923; CHECK-NEXT:    movq (%rdi), %rax
1924; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
1925; CHECK-NEXT:    retq
1926  %v = load atomic i64, ptr %p unordered, align 8
1927  %ret = trunc i64 %v to i32
1928  ret i32 %ret
1929}
1930
1931; Legal to reduce the load width and fold the load (TODO)
1932define i32 @fold_trunc_add(ptr %p, i32 %v2) {
1933; CHECK-O0-LABEL: fold_trunc_add:
1934; CHECK-O0:       # %bb.0:
1935; CHECK-O0-NEXT:    movq (%rdi), %rax
1936; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
1937; CHECK-O0-NEXT:    addl %esi, %eax
1938; CHECK-O0-NEXT:    retq
1939;
1940; CHECK-O3-LABEL: fold_trunc_add:
1941; CHECK-O3:       # %bb.0:
1942; CHECK-O3-NEXT:    movq (%rdi), %rax
1943; CHECK-O3-NEXT:    addl %esi, %eax
1944; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1945; CHECK-O3-NEXT:    retq
1946  %v = load atomic i64, ptr %p unordered, align 8
1947  %trunc = trunc i64 %v to i32
1948  %ret = add i32 %trunc, %v2
1949  ret i32 %ret
1950}
1951
1952; Legal to reduce the load width and fold the load (TODO)
1953define i32 @fold_trunc_and(ptr %p, i32 %v2) {
1954; CHECK-O0-LABEL: fold_trunc_and:
1955; CHECK-O0:       # %bb.0:
1956; CHECK-O0-NEXT:    movq (%rdi), %rax
1957; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
1958; CHECK-O0-NEXT:    andl %esi, %eax
1959; CHECK-O0-NEXT:    retq
1960;
1961; CHECK-O3-LABEL: fold_trunc_and:
1962; CHECK-O3:       # %bb.0:
1963; CHECK-O3-NEXT:    movq (%rdi), %rax
1964; CHECK-O3-NEXT:    andl %esi, %eax
1965; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1966; CHECK-O3-NEXT:    retq
1967  %v = load atomic i64, ptr %p unordered, align 8
1968  %trunc = trunc i64 %v to i32
1969  %ret = and i32 %trunc, %v2
1970  ret i32 %ret
1971}
1972
1973; Legal to reduce the load width and fold the load (TODO)
1974define i32 @fold_trunc_or(ptr %p, i32 %v2) {
1975; CHECK-O0-LABEL: fold_trunc_or:
1976; CHECK-O0:       # %bb.0:
1977; CHECK-O0-NEXT:    movq (%rdi), %rax
1978; CHECK-O0-NEXT:    # kill: def $eax killed $eax killed $rax
1979; CHECK-O0-NEXT:    orl %esi, %eax
1980; CHECK-O0-NEXT:    retq
1981;
1982; CHECK-O3-LABEL: fold_trunc_or:
1983; CHECK-O3:       # %bb.0:
1984; CHECK-O3-NEXT:    movq (%rdi), %rax
1985; CHECK-O3-NEXT:    orl %esi, %eax
1986; CHECK-O3-NEXT:    # kill: def $eax killed $eax killed $rax
1987; CHECK-O3-NEXT:    retq
1988  %v = load atomic i64, ptr %p unordered, align 8
1989  %trunc = trunc i64 %v to i32
1990  %ret = or i32 %trunc, %v2
1991  ret i32 %ret
1992}
1993
1994; It's tempting to split the wide load into two smaller byte loads
1995; to reduce memory traffic, but this would be illegal for a atomic load
1996define i32 @split_load(ptr %p) {
1997; CHECK-O0-LABEL: split_load:
1998; CHECK-O0:       # %bb.0:
1999; CHECK-O0-NEXT:    movq (%rdi), %rcx
2000; CHECK-O0-NEXT:    movb %cl, %al
2001; CHECK-O0-NEXT:    shrq $32, %rcx
2002; CHECK-O0-NEXT:    # kill: def $cl killed $cl killed $rcx
2003; CHECK-O0-NEXT:    orb %cl, %al
2004; CHECK-O0-NEXT:    movzbl %al, %eax
2005; CHECK-O0-NEXT:    retq
2006;
2007; CHECK-O3-LABEL: split_load:
2008; CHECK-O3:       # %bb.0:
2009; CHECK-O3-NEXT:    movq (%rdi), %rax
2010; CHECK-O3-NEXT:    movq %rax, %rcx
2011; CHECK-O3-NEXT:    shrq $32, %rcx
2012; CHECK-O3-NEXT:    orl %eax, %ecx
2013; CHECK-O3-NEXT:    movzbl %cl, %eax
2014; CHECK-O3-NEXT:    retq
2015  %v = load atomic i64, ptr %p unordered, align 8
2016  %b1 = trunc i64 %v to i8
2017  %v.shift = lshr i64 %v, 32
2018  %b2 = trunc i64 %v.shift to i8
2019  %or = or i8 %b1, %b2
2020  %ret = zext i8 %or to i32
2021  ret i32 %ret
2022}
2023
2024;; A collection of simple memory forwarding tests.  Nothing particular
2025;; interesting semantic wise, just demonstrating obvious missed transforms.
2026
2027@Zero = constant i64 0
2028
2029; TODO: should return constant
2030define i64 @constant_folding(ptr %p) {
2031; CHECK-LABEL: constant_folding:
2032; CHECK:       # %bb.0:
2033; CHECK-NEXT:    movq (%rdi), %rax
2034; CHECK-NEXT:    retq
2035  %v = load atomic i64, ptr %p unordered, align 8
2036  ret i64 %v
2037}
2038
2039; Legal to forward and fold (TODO)
2040define i64 @load_forwarding(ptr %p) {
2041; CHECK-LABEL: load_forwarding:
2042; CHECK:       # %bb.0:
2043; CHECK-NEXT:    movq (%rdi), %rax
2044; CHECK-NEXT:    orq (%rdi), %rax
2045; CHECK-NEXT:    retq
2046  %v = load atomic i64, ptr %p unordered, align 8
2047  %v2 = load atomic i64, ptr %p unordered, align 8
2048  %ret = or i64 %v, %v2
2049  ret i64 %ret
2050}
2051
2052; Legal to forward (TODO)
2053define i64 @store_forward(ptr %p, i64 %v) {
2054; CHECK-LABEL: store_forward:
2055; CHECK:       # %bb.0:
2056; CHECK-NEXT:    movq %rsi, (%rdi)
2057; CHECK-NEXT:    movq (%rdi), %rax
2058; CHECK-NEXT:    retq
2059  store atomic i64 %v, ptr %p unordered, align 8
2060  %ret = load atomic i64, ptr %p unordered, align 8
2061  ret i64 %ret
2062}
2063
2064; Legal to kill (TODO)
2065define void @dead_writeback(ptr %p) {
2066; CHECK-LABEL: dead_writeback:
2067; CHECK:       # %bb.0:
2068; CHECK-NEXT:    movq (%rdi), %rax
2069; CHECK-NEXT:    movq %rax, (%rdi)
2070; CHECK-NEXT:    retq
2071  %v = load atomic i64, ptr %p unordered, align 8
2072  store atomic i64 %v, ptr %p unordered, align 8
2073  ret void
2074}
2075
2076; Legal to kill (TODO)
2077define void @dead_store(ptr %p, i64 %v) {
2078; CHECK-LABEL: dead_store:
2079; CHECK:       # %bb.0:
2080; CHECK-NEXT:    movq $0, (%rdi)
2081; CHECK-NEXT:    movq %rsi, (%rdi)
2082; CHECK-NEXT:    retq
2083  store atomic i64 0, ptr %p unordered, align 8
2084  store atomic i64 %v, ptr %p unordered, align 8
2085  ret void
2086}
2087
2088;; The next batch of tests ensure that we don't try to fold a load into a
2089;; use where the code motion implied for the load is prevented by a fence.
2090;; Note: We're checking that the load doesn't get moved below the fence as
2091;; part of folding, but is technically legal to lift the add above the fence.
2092;; If that were to happen, please rewrite the test to ensure load movement
2093;; isn't violated.
2094
2095define i64 @nofold_fence(ptr %p) {
2096; CHECK-LABEL: nofold_fence:
2097; CHECK:       # %bb.0:
2098; CHECK-NEXT:    movq (%rdi), %rax
2099; CHECK-NEXT:    mfence
2100; CHECK-NEXT:    addq $15, %rax
2101; CHECK-NEXT:    retq
2102  %v = load atomic i64, ptr %p unordered, align 8
2103  fence seq_cst
2104  %ret = add i64 %v, 15
2105  ret i64 %ret
2106}
2107
2108define i64 @nofold_fence_acquire(ptr %p) {
2109; CHECK-LABEL: nofold_fence_acquire:
2110; CHECK:       # %bb.0:
2111; CHECK-NEXT:    movq (%rdi), %rax
2112; CHECK-NEXT:    #MEMBARRIER
2113; CHECK-NEXT:    addq $15, %rax
2114; CHECK-NEXT:    retq
2115  %v = load atomic i64, ptr %p unordered, align 8
2116  fence acquire
2117  %ret = add i64 %v, 15
2118  ret i64 %ret
2119}
2120
2121
2122define i64 @nofold_stfence(ptr %p) {
2123; CHECK-LABEL: nofold_stfence:
2124; CHECK:       # %bb.0:
2125; CHECK-NEXT:    movq (%rdi), %rax
2126; CHECK-NEXT:    #MEMBARRIER
2127; CHECK-NEXT:    addq $15, %rax
2128; CHECK-NEXT:    retq
2129  %v = load atomic i64, ptr %p unordered, align 8
2130  fence syncscope("singlethread") seq_cst
2131  %ret = add i64 %v, 15
2132  ret i64 %ret
2133}
2134
2135;; Next, test how well we can fold invariant loads.
2136
2137@Constant = external dso_local constant i64
2138
2139define i64 @fold_constant(i64 %arg) {
2140; CHECK-O0-LABEL: fold_constant:
2141; CHECK-O0:       # %bb.0:
2142; CHECK-O0-NEXT:    movq %rdi, %rax
2143; CHECK-O0-NEXT:    addq Constant, %rax
2144; CHECK-O0-NEXT:    retq
2145;
2146; CHECK-O3-LABEL: fold_constant:
2147; CHECK-O3:       # %bb.0:
2148; CHECK-O3-NEXT:    movq %rdi, %rax
2149; CHECK-O3-NEXT:    addq Constant(%rip), %rax
2150; CHECK-O3-NEXT:    retq
2151  %v = load atomic i64, ptr @Constant unordered, align 8
2152  %ret = add i64 %v, %arg
2153  ret i64 %ret
2154}
2155
2156define i64 @fold_constant_clobber(ptr %p, i64 %arg) {
2157; CHECK-LABEL: fold_constant_clobber:
2158; CHECK:       # %bb.0:
2159; CHECK-NEXT:    movq Constant(%rip), %rax
2160; CHECK-NEXT:    movq $5, (%rdi)
2161; CHECK-NEXT:    addq %rsi, %rax
2162; CHECK-NEXT:    retq
2163  %v = load atomic i64, ptr @Constant unordered, align 8
2164  store i64 5, ptr %p
2165  %ret = add i64 %v, %arg
2166  ret i64 %ret
2167}
2168
2169define i64 @fold_constant_fence(i64 %arg) {
2170; CHECK-LABEL: fold_constant_fence:
2171; CHECK:       # %bb.0:
2172; CHECK-NEXT:    movq Constant(%rip), %rax
2173; CHECK-NEXT:    mfence
2174; CHECK-NEXT:    addq %rdi, %rax
2175; CHECK-NEXT:    retq
2176  %v = load atomic i64, ptr @Constant unordered, align 8
2177  fence seq_cst
2178  %ret = add i64 %v, %arg
2179  ret i64 %ret
2180}
2181
2182define i64 @fold_invariant_clobber(ptr dereferenceable(8) %p, i64 %arg) {
2183; CHECK-LABEL: fold_invariant_clobber:
2184; CHECK:       # %bb.0:
2185; CHECK-NEXT:    movq (%rdi), %rax
2186; CHECK-NEXT:    movq $5, (%rdi)
2187; CHECK-NEXT:    addq %rsi, %rax
2188; CHECK-NEXT:    retq
2189  %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
2190  store i64 5, ptr %p
2191  %ret = add i64 %v, %arg
2192  ret i64 %ret
2193}
2194
2195
2196define i64 @fold_invariant_fence(ptr dereferenceable(8) %p, i64 %arg) {
2197; CHECK-LABEL: fold_invariant_fence:
2198; CHECK:       # %bb.0:
2199; CHECK-NEXT:    movq (%rdi), %rax
2200; CHECK-NEXT:    mfence
2201; CHECK-NEXT:    addq %rsi, %rax
2202; CHECK-NEXT:    retq
2203  %v = load atomic i64, ptr %p unordered, align 8, !invariant.load !{}
2204  fence seq_cst
2205  %ret = add i64 %v, %arg
2206  ret i64 %ret
2207}
2208
2209
2210; Exercise a few cases involving any extend idioms
2211
2212define i16 @load_i8_anyext_i16(ptr %ptr) {
2213; CHECK-O0-LABEL: load_i8_anyext_i16:
2214; CHECK-O0:       # %bb.0:
2215; CHECK-O0-NEXT:    movb (%rdi), %al
2216; CHECK-O0-NEXT:    movzbl %al, %eax
2217; CHECK-O0-NEXT:    # kill: def $ax killed $ax killed $eax
2218; CHECK-O0-NEXT:    retq
2219;
2220; CHECK-O3-LABEL: load_i8_anyext_i16:
2221; CHECK-O3:       # %bb.0:
2222; CHECK-O3-NEXT:    movzbl (%rdi), %eax
2223; CHECK-O3-NEXT:    # kill: def $ax killed $ax killed $eax
2224; CHECK-O3-NEXT:    retq
2225  %v = load atomic i8, ptr %ptr unordered, align 2
2226  %vec = insertelement <2 x i8> undef, i8 %v, i32 0
2227  %res = bitcast <2 x i8> %vec to i16
2228  ret i16 %res
2229}
2230
2231define i32 @load_i8_anyext_i32(ptr %ptr) {
2232; CHECK-O0-LABEL: load_i8_anyext_i32:
2233; CHECK-O0:       # %bb.0:
2234; CHECK-O0-NEXT:    movb (%rdi), %al
2235; CHECK-O0-NEXT:    movzbl %al, %eax
2236; CHECK-O0-NEXT:    retq
2237;
2238; CHECK-O3-LABEL: load_i8_anyext_i32:
2239; CHECK-O3:       # %bb.0:
2240; CHECK-O3-NEXT:    movzbl (%rdi), %eax
2241; CHECK-O3-NEXT:    retq
2242  %v = load atomic i8, ptr %ptr unordered, align 4
2243  %vec = insertelement <4 x i8> undef, i8 %v, i32 0
2244  %res = bitcast <4 x i8> %vec to i32
2245  ret i32 %res
2246}
2247
2248define i32 @load_i16_anyext_i32(ptr %ptr) {
2249; CHECK-O0-LABEL: load_i16_anyext_i32:
2250; CHECK-O0:       # %bb.0:
2251; CHECK-O0-NEXT:    movw (%rdi), %cx
2252; CHECK-O0-NEXT:    # implicit-def: $eax
2253; CHECK-O0-NEXT:    movw %cx, %ax
2254; CHECK-O0-NEXT:    retq
2255;
2256; CHECK-O3-LABEL: load_i16_anyext_i32:
2257; CHECK-O3:       # %bb.0:
2258; CHECK-O3-NEXT:    movzwl (%rdi), %eax
2259; CHECK-O3-NEXT:    retq
2260  %v = load atomic i16, ptr %ptr unordered, align 4
2261  %vec = insertelement <2 x i16> undef, i16 %v, i64 0
2262  %res = bitcast <2 x i16> %vec to i32
2263  ret i32 %res
2264}
2265
2266define i64 @load_i16_anyext_i64(ptr %ptr) {
2267; CHECK-O0-LABEL: load_i16_anyext_i64:
2268; CHECK-O0:       # %bb.0:
2269; CHECK-O0-NEXT:    movw (%rdi), %cx
2270; CHECK-O0-NEXT:    # implicit-def: $eax
2271; CHECK-O0-NEXT:    movw %cx, %ax
2272; CHECK-O0-NEXT:    vmovd %eax, %xmm0
2273; CHECK-O0-NEXT:    vmovq %xmm0, %rax
2274; CHECK-O0-NEXT:    retq
2275;
2276; CHECK-O3-LABEL: load_i16_anyext_i64:
2277; CHECK-O3:       # %bb.0:
2278; CHECK-O3-NEXT:    movzwl (%rdi), %eax
2279; CHECK-O3-NEXT:    vmovd %eax, %xmm0
2280; CHECK-O3-NEXT:    vmovq %xmm0, %rax
2281; CHECK-O3-NEXT:    retq
2282  %v = load atomic i16, ptr %ptr unordered, align 8
2283  %vec = insertelement <4 x i16> undef, i16 %v, i64 0
2284  %res = bitcast <4 x i16> %vec to i64
2285  ret i64 %res
2286}
2287
2288; TODO: Would be legal to combine for legal atomic wider types
2289define i16 @load_combine(ptr %p) {
2290; CHECK-O0-LABEL: load_combine:
2291; CHECK-O0:       # %bb.0:
2292; CHECK-O0-NEXT:    movb (%rdi), %al
2293; CHECK-O0-NEXT:    movb 1(%rdi), %cl
2294; CHECK-O0-NEXT:    movzbl %al, %eax
2295; CHECK-O0-NEXT:    # kill: def $ax killed $ax killed $eax
2296; CHECK-O0-NEXT:    movzbl %cl, %ecx
2297; CHECK-O0-NEXT:    # kill: def $cx killed $cx killed $ecx
2298; CHECK-O0-NEXT:    shlw $8, %cx
2299; CHECK-O0-NEXT:    orw %cx, %ax
2300; CHECK-O0-NEXT:    retq
2301;
2302; CHECK-O3-LABEL: load_combine:
2303; CHECK-O3:       # %bb.0:
2304; CHECK-O3-NEXT:    movzbl (%rdi), %ecx
2305; CHECK-O3-NEXT:    movzbl 1(%rdi), %eax
2306; CHECK-O3-NEXT:    shll $8, %eax
2307; CHECK-O3-NEXT:    orl %ecx, %eax
2308; CHECK-O3-NEXT:    # kill: def $ax killed $ax killed $eax
2309; CHECK-O3-NEXT:    retq
2310  %v1 = load atomic i8, ptr %p unordered, align 2
2311  %p2 = getelementptr i8, ptr %p, i64 1
2312  %v2 = load atomic i8, ptr %p2 unordered, align 1
2313  %v1.ext = zext i8 %v1 to i16
2314  %v2.ext = zext i8 %v2 to i16
2315  %v2.sht = shl i16 %v2.ext, 8
2316  %res = or i16 %v1.ext, %v2.sht
2317  ret i16 %res
2318}
2319
2320define i1 @fold_cmp_over_fence(ptr %p, i32 %v1) {
2321; CHECK-O0-LABEL: fold_cmp_over_fence:
2322; CHECK-O0:       # %bb.0:
2323; CHECK-O0-NEXT:    movl (%rdi), %eax
2324; CHECK-O0-NEXT:    mfence
2325; CHECK-O0-NEXT:    cmpl %eax, %esi
2326; CHECK-O0-NEXT:    jne .LBB116_2
2327; CHECK-O0-NEXT:  # %bb.1: # %taken
2328; CHECK-O0-NEXT:    movb $1, %al
2329; CHECK-O0-NEXT:    retq
2330; CHECK-O0-NEXT:  .LBB116_2: # %untaken
2331; CHECK-O0-NEXT:    xorl %eax, %eax
2332; CHECK-O0-NEXT:    # kill: def $al killed $al killed $eax
2333; CHECK-O0-NEXT:    retq
2334;
2335; CHECK-O3-LABEL: fold_cmp_over_fence:
2336; CHECK-O3:       # %bb.0:
2337; CHECK-O3-NEXT:    movl (%rdi), %eax
2338; CHECK-O3-NEXT:    mfence
2339; CHECK-O3-NEXT:    cmpl %eax, %esi
2340; CHECK-O3-NEXT:    jne .LBB116_2
2341; CHECK-O3-NEXT:  # %bb.1: # %taken
2342; CHECK-O3-NEXT:    movb $1, %al
2343; CHECK-O3-NEXT:    retq
2344; CHECK-O3-NEXT:  .LBB116_2: # %untaken
2345; CHECK-O3-NEXT:    xorl %eax, %eax
2346; CHECK-O3-NEXT:    retq
2347  %v2 = load atomic i32, ptr %p unordered, align 4
2348  fence seq_cst
2349  %cmp = icmp eq i32 %v1, %v2
2350  br i1 %cmp, label %taken, label %untaken
2351taken:
2352  ret i1 true
2353untaken:
2354  ret i1 false
2355}
2356