xref: /llvm-project/llvm/test/CodeGen/WebAssembly/offset-atomics.ll (revision 73856247eef35f5336e485dc009842a5b991c421)
1; RUN: not --crash llc > /dev/null < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt
2; RUN: llc < %s -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
3
4; Test that atomic loads are assembled properly.
5
6target triple = "wasm32-unknown-unknown"
7
8;===----------------------------------------------------------------------------
9; Atomic loads: 32-bit
10;===----------------------------------------------------------------------------
11
12; Basic load.
13
14; CHECK-LABEL: load_i32_no_offset:
15; CHECK: i32.atomic.load $push0=, 0($0){{$}}
16; CHECK-NEXT: return $pop0{{$}}
17define i32 @load_i32_no_offset(ptr %p) {
18  %v = load atomic i32, ptr %p seq_cst, align 4
19  ret i32 %v
20}
21
22; With an nuw add, we can fold an offset.
23
24; CHECK-LABEL: load_i32_with_folded_offset:
25; CHECK: i32.atomic.load $push0=, 24($0){{$}}
26define i32 @load_i32_with_folded_offset(ptr %p) {
27  %q = ptrtoint ptr %p to i32
28  %r = add nuw i32 %q, 24
29  %s = inttoptr i32 %r to ptr
30  %t = load atomic i32, ptr %s seq_cst, align 4
31  ret i32 %t
32}
33
34; With an inbounds gep, we can fold an offset.
35
36; CHECK-LABEL: load_i32_with_folded_gep_offset:
37; CHECK: i32.atomic.load $push0=, 24($0){{$}}
38define i32 @load_i32_with_folded_gep_offset(ptr %p) {
39  %s = getelementptr inbounds i32, ptr %p, i32 6
40  %t = load atomic i32, ptr %s seq_cst, align 4
41  ret i32 %t
42}
43
44; We can't fold a negative offset though, even with an inbounds gep.
45
46; CHECK-LABEL: load_i32_with_unfolded_gep_negative_offset:
47; CHECK: i32.const $push0=, -24{{$}}
48; CHECK: i32.add $push1=, $0, $pop0{{$}}
49; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
50define i32 @load_i32_with_unfolded_gep_negative_offset(ptr %p) {
51  %s = getelementptr inbounds i32, ptr %p, i32 -6
52  %t = load atomic i32, ptr %s seq_cst, align 4
53  ret i32 %t
54}
55
56; Without nuw, and even with nsw, we can't fold an offset.
57
58; CHECK-LABEL: load_i32_with_unfolded_offset:
59; CHECK: i32.const $push0=, 24{{$}}
60; CHECK: i32.add $push1=, $0, $pop0{{$}}
61; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
62define i32 @load_i32_with_unfolded_offset(ptr %p) {
63  %q = ptrtoint ptr %p to i32
64  %r = add nsw i32 %q, 24
65  %s = inttoptr i32 %r to ptr
66  %t = load atomic i32, ptr %s seq_cst, align 4
67  ret i32 %t
68}
69
70; Without inbounds, we can't fold a gep offset.
71
72; CHECK-LABEL: load_i32_with_unfolded_gep_offset:
73; CHECK: i32.const $push0=, 24{{$}}
74; CHECK: i32.add $push1=, $0, $pop0{{$}}
75; CHECK: i32.atomic.load $push2=, 0($pop1){{$}}
76define i32 @load_i32_with_unfolded_gep_offset(ptr %p) {
77  %s = getelementptr i32, ptr %p, i32 6
78  %t = load atomic i32, ptr %s seq_cst, align 4
79  ret i32 %t
80}
81
82; When loading from a fixed address, materialize a zero.
83
84; CHECK-LABEL: load_i32_from_numeric_address
85; CHECK: i32.const $push0=, 0{{$}}
86; CHECK: i32.atomic.load $push1=, 42($pop0){{$}}
87define i32 @load_i32_from_numeric_address() {
88  %s = inttoptr i32 42 to ptr
89  %t = load atomic i32, ptr %s seq_cst, align 4
90  ret i32 %t
91}
92
93; CHECK-LABEL: load_i32_from_global_address
94; CHECK: i32.const $push0=, 0{{$}}
95; CHECK: i32.atomic.load $push1=, gv($pop0){{$}}
96@gv = global i32 0
97define i32 @load_i32_from_global_address() {
98  %t = load atomic i32, ptr @gv seq_cst, align 4
99  ret i32 %t
100}
101
102;===----------------------------------------------------------------------------
103; Atomic loads: 64-bit
104;===----------------------------------------------------------------------------
105
106; Basic load.
107
108; CHECK-LABEL: load_i64_no_offset:
109; CHECK: i64.atomic.load $push0=, 0($0){{$}}
110; CHECK-NEXT: return $pop0{{$}}
111define i64 @load_i64_no_offset(ptr %p) {
112  %v = load atomic i64, ptr %p seq_cst, align 8
113  ret i64 %v
114}
115
116; With an nuw add, we can fold an offset.
117
118; CHECK-LABEL: load_i64_with_folded_offset:
119; CHECK: i64.atomic.load $push0=, 24($0){{$}}
120define i64 @load_i64_with_folded_offset(ptr %p) {
121  %q = ptrtoint ptr %p to i32
122  %r = add nuw i32 %q, 24
123  %s = inttoptr i32 %r to ptr
124  %t = load atomic i64, ptr %s seq_cst, align 8
125  ret i64 %t
126}
127
128; With an inbounds gep, we can fold an offset.
129
130; CHECK-LABEL: load_i64_with_folded_gep_offset:
131; CHECK: i64.atomic.load $push0=, 24($0){{$}}
132define i64 @load_i64_with_folded_gep_offset(ptr %p) {
133  %s = getelementptr inbounds i64, ptr %p, i32 3
134  %t = load atomic i64, ptr %s seq_cst, align 8
135  ret i64 %t
136}
137
138; We can't fold a negative offset though, even with an inbounds gep.
139
140; CHECK-LABEL: load_i64_with_unfolded_gep_negative_offset:
141; CHECK: i32.const $push0=, -24{{$}}
142; CHECK: i32.add $push1=, $0, $pop0{{$}}
143; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
144define i64 @load_i64_with_unfolded_gep_negative_offset(ptr %p) {
145  %s = getelementptr inbounds i64, ptr %p, i32 -3
146  %t = load atomic i64, ptr %s seq_cst, align 8
147  ret i64 %t
148}
149
150; Without nuw, and even with nsw, we can't fold an offset.
151
152; CHECK-LABEL: load_i64_with_unfolded_offset:
153; CHECK: i32.const $push0=, 24{{$}}
154; CHECK: i32.add $push1=, $0, $pop0{{$}}
155; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
156define i64 @load_i64_with_unfolded_offset(ptr %p) {
157  %q = ptrtoint ptr %p to i32
158  %r = add nsw i32 %q, 24
159  %s = inttoptr i32 %r to ptr
160  %t = load atomic i64, ptr %s seq_cst, align 8
161  ret i64 %t
162}
163
164; Without inbounds, we can't fold a gep offset.
165
166; CHECK-LABEL: load_i64_with_unfolded_gep_offset:
167; CHECK: i32.const $push0=, 24{{$}}
168; CHECK: i32.add $push1=, $0, $pop0{{$}}
169; CHECK: i64.atomic.load $push2=, 0($pop1){{$}}
170define i64 @load_i64_with_unfolded_gep_offset(ptr %p) {
171  %s = getelementptr i64, ptr %p, i32 3
172  %t = load atomic i64, ptr %s seq_cst, align 8
173  ret i64 %t
174}
175
176;===----------------------------------------------------------------------------
177; Atomic stores: 32-bit
178;===----------------------------------------------------------------------------
179
180; Basic store.
181
182; CHECK-LABEL: store_i32_no_offset:
183; CHECK-NEXT: .functype store_i32_no_offset (i32, i32) -> (){{$}}
184; CHECK-NEXT: i32.atomic.store 0($0), $1{{$}}
185; CHECK-NEXT: return{{$}}
186define void @store_i32_no_offset(ptr %p, i32 %v) {
187  store atomic i32 %v, ptr %p seq_cst, align 4
188  ret void
189}
190
191; With an nuw add, we can fold an offset.
192
193; CHECK-LABEL: store_i32_with_folded_offset:
194; CHECK: i32.atomic.store 24($0), $pop0{{$}}
195define void @store_i32_with_folded_offset(ptr %p) {
196  %q = ptrtoint ptr %p to i32
197  %r = add nuw i32 %q, 24
198  %s = inttoptr i32 %r to ptr
199  store atomic i32 0, ptr %s seq_cst, align 4
200  ret void
201}
202
203; With an inbounds gep, we can fold an offset.
204
205; CHECK-LABEL: store_i32_with_folded_gep_offset:
206; CHECK: i32.atomic.store 24($0), $pop0{{$}}
207define void @store_i32_with_folded_gep_offset(ptr %p) {
208  %s = getelementptr inbounds i32, ptr %p, i32 6
209  store atomic i32 0, ptr %s seq_cst, align 4
210  ret void
211}
212
213; We can't fold a negative offset though, even with an inbounds gep.
214
215; CHECK-LABEL: store_i32_with_unfolded_gep_negative_offset:
216; CHECK: i32.const $push0=, -24{{$}}
217; CHECK: i32.add $push1=, $0, $pop0{{$}}
218; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
219define void @store_i32_with_unfolded_gep_negative_offset(ptr %p) {
220  %s = getelementptr inbounds i32, ptr %p, i32 -6
221  store atomic i32 0, ptr %s seq_cst, align 4
222  ret void
223}
224
225; Without nuw, and even with nsw, we can't fold an offset.
226
227; CHECK-LABEL: store_i32_with_unfolded_offset:
228; CHECK: i32.const $push0=, 24{{$}}
229; CHECK: i32.add $push1=, $0, $pop0{{$}}
230; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
231define void @store_i32_with_unfolded_offset(ptr %p) {
232  %q = ptrtoint ptr %p to i32
233  %r = add nsw i32 %q, 24
234  %s = inttoptr i32 %r to ptr
235  store atomic i32 0, ptr %s seq_cst, align 4
236  ret void
237}
238
239; Without inbounds, we can't fold a gep offset.
240
241; CHECK-LABEL: store_i32_with_unfolded_gep_offset:
242; CHECK: i32.const $push0=, 24{{$}}
243; CHECK: i32.add $push1=, $0, $pop0{{$}}
244; CHECK: i32.atomic.store 0($pop1), $pop2{{$}}
245define void @store_i32_with_unfolded_gep_offset(ptr %p) {
246  %s = getelementptr i32, ptr %p, i32 6
247  store atomic i32 0, ptr %s seq_cst, align 4
248  ret void
249}
250
251; When storing from a fixed address, materialize a zero.
252
253; CHECK-LABEL: store_i32_to_numeric_address:
254; CHECK:      i32.const $push0=, 0{{$}}
255; CHECK-NEXT: i32.const $push1=, 0{{$}}
256; CHECK-NEXT: i32.atomic.store 42($pop0), $pop1{{$}}
257define void @store_i32_to_numeric_address() {
258  %s = inttoptr i32 42 to ptr
259  store atomic i32 0, ptr %s seq_cst, align 4
260  ret void
261}
262
263; CHECK-LABEL: store_i32_to_global_address:
264; CHECK: i32.const $push0=, 0{{$}}
265; CHECK: i32.const $push1=, 0{{$}}
266; CHECK: i32.atomic.store gv($pop0), $pop1{{$}}
267define void @store_i32_to_global_address() {
268  store atomic i32 0, ptr @gv seq_cst, align 4
269  ret void
270}
271
272;===----------------------------------------------------------------------------
273; Atomic stores: 64-bit
274;===----------------------------------------------------------------------------
275
276; Basic store.
277
278; CHECK-LABEL: store_i64_no_offset:
279; CHECK-NEXT: .functype store_i64_no_offset (i32, i64) -> (){{$}}
280; CHECK-NEXT: i64.atomic.store 0($0), $1{{$}}
281; CHECK-NEXT: return{{$}}
282define void @store_i64_no_offset(ptr %p, i64 %v) {
283  store atomic i64 %v, ptr %p seq_cst, align 8
284  ret void
285}
286
287; With an nuw add, we can fold an offset.
288
289; CHECK-LABEL: store_i64_with_folded_offset:
290; CHECK: i64.atomic.store 24($0), $pop0{{$}}
291define void @store_i64_with_folded_offset(ptr %p) {
292  %q = ptrtoint ptr %p to i32
293  %r = add nuw i32 %q, 24
294  %s = inttoptr i32 %r to ptr
295  store atomic i64 0, ptr %s seq_cst, align 8
296  ret void
297}
298
299; With an inbounds gep, we can fold an offset.
300
301; CHECK-LABEL: store_i64_with_folded_gep_offset:
302; CHECK: i64.atomic.store 24($0), $pop0{{$}}
303define void @store_i64_with_folded_gep_offset(ptr %p) {
304  %s = getelementptr inbounds i64, ptr %p, i32 3
305  store atomic i64 0, ptr %s seq_cst, align 8
306  ret void
307}
308
309; We can't fold a negative offset though, even with an inbounds gep.
310
311; CHECK-LABEL: store_i64_with_unfolded_gep_negative_offset:
312; CHECK: i32.const $push0=, -24{{$}}
313; CHECK: i32.add $push1=, $0, $pop0{{$}}
314; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
315define void @store_i64_with_unfolded_gep_negative_offset(ptr %p) {
316  %s = getelementptr inbounds i64, ptr %p, i32 -3
317  store atomic i64 0, ptr %s seq_cst, align 8
318  ret void
319}
320
321; Without nuw, and even with nsw, we can't fold an offset.
322
323; CHECK-LABEL: store_i64_with_unfolded_offset:
324; CHECK: i32.const $push0=, 24{{$}}
325; CHECK: i32.add $push1=, $0, $pop0{{$}}
326; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
327define void @store_i64_with_unfolded_offset(ptr %p) {
328  %q = ptrtoint ptr %p to i32
329  %r = add nsw i32 %q, 24
330  %s = inttoptr i32 %r to ptr
331  store atomic i64 0, ptr %s seq_cst, align 8
332  ret void
333}
334
335; Without inbounds, we can't fold a gep offset.
336
337; CHECK-LABEL: store_i64_with_unfolded_gep_offset:
338; CHECK: i32.const $push0=, 24{{$}}
339; CHECK: i32.add $push1=, $0, $pop0{{$}}
340; CHECK: i64.atomic.store 0($pop1), $pop2{{$}}
341define void @store_i64_with_unfolded_gep_offset(ptr %p) {
342  %s = getelementptr i64, ptr %p, i32 3
343  store atomic i64 0, ptr %s seq_cst, align 8
344  ret void
345}
346
347;===----------------------------------------------------------------------------
348; Atomic sign-extending loads
349;===----------------------------------------------------------------------------
350
351; Fold an offset into a sign-extending load.
352
353; CHECK-LABEL: load_i8_i32_s_with_folded_offset:
354; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
355; CHECK-NEXT: i32.extend8_s $push1=, $pop0
356define i32 @load_i8_i32_s_with_folded_offset(ptr %p) {
357  %q = ptrtoint ptr %p to i32
358  %r = add nuw i32 %q, 24
359  %s = inttoptr i32 %r to ptr
360  %t = load atomic i8, ptr %s seq_cst, align 1
361  %u = sext i8 %t to i32
362  ret i32 %u
363}
364
365; 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
366; CHECK-LABEL: load_i32_i64_s_with_folded_offset:
367; CHECK: i32.atomic.load $push0=, 24($0){{$}}
368; CHECK-NEXT: i64.extend_i32_s $push1=, $pop0{{$}}
369define i64 @load_i32_i64_s_with_folded_offset(ptr %p) {
370  %q = ptrtoint ptr %p to i32
371  %r = add nuw i32 %q, 24
372  %s = inttoptr i32 %r to ptr
373  %t = load atomic i32, ptr %s seq_cst, align 4
374  %u = sext i32 %t to i64
375  ret i64 %u
376}
377
378; Fold a gep offset into a sign-extending load.
379
380; CHECK-LABEL: load_i8_i32_s_with_folded_gep_offset:
381; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
382; CHECK-NEXT: i32.extend8_s $push1=, $pop0
383define i32 @load_i8_i32_s_with_folded_gep_offset(ptr %p) {
384  %s = getelementptr inbounds i8, ptr %p, i32 24
385  %t = load atomic i8, ptr %s seq_cst, align 1
386  %u = sext i8 %t to i32
387  ret i32 %u
388}
389
390; CHECK-LABEL: load_i16_i32_s_with_folded_gep_offset:
391; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}}
392; CHECK-NEXT: i32.extend16_s $push1=, $pop0
393define i32 @load_i16_i32_s_with_folded_gep_offset(ptr %p) {
394  %s = getelementptr inbounds i16, ptr %p, i32 24
395  %t = load atomic i16, ptr %s seq_cst, align 2
396  %u = sext i16 %t to i32
397  ret i32 %u
398}
399
400; CHECK-LABEL: load_i16_i64_s_with_folded_gep_offset:
401; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}}
402; CHECK-NEXT: i64.extend16_s $push1=, $pop0
403define i64 @load_i16_i64_s_with_folded_gep_offset(ptr %p) {
404  %s = getelementptr inbounds i16, ptr %p, i32 24
405  %t = load atomic i16, ptr %s seq_cst, align 2
406  %u = sext i16 %t to i64
407  ret i64 %u
408}
409
410; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
411; an 'add' if the or'ed bits are known to be zero.
412
413; CHECK-LABEL: load_i8_i32_s_with_folded_or_offset:
414; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
415; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
416define i32 @load_i8_i32_s_with_folded_or_offset(i32 %x) {
417  %and = and i32 %x, -4
418  %t0 = inttoptr i32 %and to ptr
419  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
420  %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1
421  %conv = sext i8 %t1 to i32
422  ret i32 %conv
423}
424
425; CHECK-LABEL: load_i8_i64_s_with_folded_or_offset:
426; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
427; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
428define i64 @load_i8_i64_s_with_folded_or_offset(i32 %x) {
429  %and = and i32 %x, -4
430  %t0 = inttoptr i32 %and to ptr
431  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
432  %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1
433  %conv = sext i8 %t1 to i64
434  ret i64 %conv
435}
436
437; When loading from a fixed address, materialize a zero.
438
439; CHECK-LABEL: load_i16_i32_s_from_numeric_address
440; CHECK: i32.const $push0=, 0{{$}}
441; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}}
442; CHECK-NEXT: i32.extend16_s $push2=, $pop1
443define i32 @load_i16_i32_s_from_numeric_address() {
444  %s = inttoptr i32 42 to ptr
445  %t = load atomic i16, ptr %s seq_cst, align 2
446  %u = sext i16 %t to i32
447  ret i32 %u
448}
449
450; CHECK-LABEL: load_i8_i32_s_from_global_address
451; CHECK: i32.const $push0=, 0{{$}}
452; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}}
453; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
454@gv8 = global i8 0
455define i32 @load_i8_i32_s_from_global_address() {
456  %t = load atomic i8, ptr @gv8 seq_cst, align 1
457  %u = sext i8 %t to i32
458  ret i32 %u
459}
460
461;===----------------------------------------------------------------------------
462; Atomic zero-extending loads
463;===----------------------------------------------------------------------------
464
465; Fold an offset into a zero-extending load.
466
467; CHECK-LABEL: load_i8_i32_z_with_folded_offset:
468; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
469define i32 @load_i8_i32_z_with_folded_offset(ptr %p) {
470  %q = ptrtoint ptr %p to i32
471  %r = add nuw i32 %q, 24
472  %s = inttoptr i32 %r to ptr
473  %t = load atomic i8, ptr %s seq_cst, align 1
474  %u = zext i8 %t to i32
475  ret i32 %u
476}
477
478; CHECK-LABEL: load_i32_i64_z_with_folded_offset:
479; CHECK: i64.atomic.load32_u $push0=, 24($0){{$}}
480define i64 @load_i32_i64_z_with_folded_offset(ptr %p) {
481  %q = ptrtoint ptr %p to i32
482  %r = add nuw i32 %q, 24
483  %s = inttoptr i32 %r to ptr
484  %t = load atomic i32, ptr %s seq_cst, align 4
485  %u = zext i32 %t to i64
486  ret i64 %u
487}
488
489; Fold a gep offset into a zero-extending load.
490
491; CHECK-LABEL: load_i8_i32_z_with_folded_gep_offset:
492; CHECK: i32.atomic.load8_u $push0=, 24($0){{$}}
493define i32 @load_i8_i32_z_with_folded_gep_offset(ptr %p) {
494  %s = getelementptr inbounds i8, ptr %p, i32 24
495  %t = load atomic i8, ptr %s seq_cst, align 1
496  %u = zext i8 %t to i32
497  ret i32 %u
498}
499
500; CHECK-LABEL: load_i16_i32_z_with_folded_gep_offset:
501; CHECK: i32.atomic.load16_u $push0=, 48($0){{$}}
502define i32 @load_i16_i32_z_with_folded_gep_offset(ptr %p) {
503  %s = getelementptr inbounds i16, ptr %p, i32 24
504  %t = load atomic i16, ptr %s seq_cst, align 2
505  %u = zext i16 %t to i32
506  ret i32 %u
507}
508
509; CHECK-LABEL: load_i16_i64_z_with_folded_gep_offset:
510; CHECK: i64.atomic.load16_u $push0=, 48($0){{$}}
511define i64 @load_i16_i64_z_with_folded_gep_offset(ptr %p) {
512  %s = getelementptr inbounds i16, ptr %p, i64 24
513  %t = load atomic i16, ptr %s seq_cst, align 2
514  %u = zext i16 %t to i64
515  ret i64 %u
516}
517
518; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
519; an 'add' if the or'ed bits are known to be zero.
520
521; CHECK-LABEL: load_i8_i32_z_with_folded_or_offset:
522; CHECK: i32.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
523define i32 @load_i8_i32_z_with_folded_or_offset(i32 %x) {
524  %and = and i32 %x, -4
525  %t0 = inttoptr i32 %and to ptr
526  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
527  %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1
528  %conv = zext i8 %t1 to i32
529  ret i32 %conv
530}
531
532; CHECK-LABEL: load_i8_i64_z_with_folded_or_offset:
533; CHECK: i64.atomic.load8_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}){{$}}
534define i64 @load_i8_i64_z_with_folded_or_offset(i32 %x) {
535  %and = and i32 %x, -4
536  %t0 = inttoptr i32 %and to ptr
537  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
538  %t1 = load atomic i8, ptr %arrayidx seq_cst, align 1
539  %conv = zext i8 %t1 to i64
540  ret i64 %conv
541}
542
543; When loading from a fixed address, materialize a zero.
544
545; CHECK-LABEL: load_i16_i32_z_from_numeric_address
546; CHECK: i32.const $push0=, 0{{$}}
547; CHECK: i32.atomic.load16_u $push1=, 42($pop0){{$}}
548define i32 @load_i16_i32_z_from_numeric_address() {
549  %s = inttoptr i32 42 to ptr
550  %t = load atomic i16, ptr %s seq_cst, align 2
551  %u = zext i16 %t to i32
552  ret i32 %u
553}
554
555; CHECK-LABEL: load_i8_i32_z_from_global_address
556; CHECK: i32.const $push0=, 0{{$}}
557; CHECK: i32.atomic.load8_u $push1=, gv8($pop0){{$}}
558define i32 @load_i8_i32_z_from_global_address() {
559  %t = load atomic i8, ptr @gv8 seq_cst, align 1
560  %u = zext i8 %t to i32
561  ret i32 %u
562}
563
564; i8 return value should test anyext loads
565
566; CHECK-LABEL: load_i8_i32_retvalue:
567; CHECK: i32.atomic.load8_u $push0=, 0($0){{$}}
568; CHECK-NEXT: return $pop0{{$}}
569define i8 @load_i8_i32_retvalue(ptr %p) {
570  %v = load atomic i8, ptr %p seq_cst, align 1
571  ret i8 %v
572}
573
574;===----------------------------------------------------------------------------
575; Atomic truncating stores
576;===----------------------------------------------------------------------------
577
578; Fold an offset into a truncating store.
579
580; CHECK-LABEL: store_i8_i32_with_folded_offset:
581; CHECK: i32.atomic.store8 24($0), $1{{$}}
582define void @store_i8_i32_with_folded_offset(ptr %p, i32 %v) {
583  %q = ptrtoint ptr %p to i32
584  %r = add nuw i32 %q, 24
585  %s = inttoptr i32 %r to ptr
586  %t = trunc i32 %v to i8
587  store atomic i8 %t, ptr %s seq_cst, align 1
588  ret void
589}
590
591; CHECK-LABEL: store_i32_i64_with_folded_offset:
592; CHECK: i64.atomic.store32 24($0), $1{{$}}
593define void @store_i32_i64_with_folded_offset(ptr %p, i64 %v) {
594  %q = ptrtoint ptr %p to i32
595  %r = add nuw i32 %q, 24
596  %s = inttoptr i32 %r to ptr
597  %t = trunc i64 %v to i32
598  store atomic i32 %t, ptr %s seq_cst, align 4
599  ret void
600}
601
602; Fold a gep offset into a truncating store.
603
604; CHECK-LABEL: store_i8_i32_with_folded_gep_offset:
605; CHECK: i32.atomic.store8 24($0), $1{{$}}
606define void @store_i8_i32_with_folded_gep_offset(ptr %p, i32 %v) {
607  %s = getelementptr inbounds i8, ptr %p, i32 24
608  %t = trunc i32 %v to i8
609  store atomic i8 %t, ptr %s seq_cst, align 1
610  ret void
611}
612
613; CHECK-LABEL: store_i16_i32_with_folded_gep_offset:
614; CHECK: i32.atomic.store16 48($0), $1{{$}}
615define void @store_i16_i32_with_folded_gep_offset(ptr %p, i32 %v) {
616  %s = getelementptr inbounds i16, ptr %p, i32 24
617  %t = trunc i32 %v to i16
618  store atomic i16 %t, ptr %s seq_cst, align 2
619  ret void
620}
621
622; CHECK-LABEL: store_i16_i64_with_folded_gep_offset:
623; CHECK: i64.atomic.store16 48($0), $1{{$}}
624define void @store_i16_i64_with_folded_gep_offset(ptr %p, i64 %v) {
625  %s = getelementptr inbounds i16, ptr %p, i32 24
626  %t = trunc i64 %v to i16
627  store atomic i16 %t, ptr %s seq_cst, align 2
628  ret void
629}
630
631; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
632; an 'add' if the or'ed bits are known to be zero.
633
634; CHECK-LABEL: store_i8_i32_with_folded_or_offset:
635; CHECK: i32.atomic.store8 2($pop{{[0-9]+}}), $1{{$}}
636define void @store_i8_i32_with_folded_or_offset(i32 %x, i32 %v) {
637  %and = and i32 %x, -4
638  %p = inttoptr i32 %and to ptr
639  %arrayidx = getelementptr inbounds i8, ptr %p, i32 2
640  %t = trunc i32 %v to i8
641  store atomic i8 %t, ptr %arrayidx seq_cst, align 1
642  ret void
643}
644
645; CHECK-LABEL: store_i8_i64_with_folded_or_offset:
646; CHECK: i64.atomic.store8 2($pop{{[0-9]+}}), $1{{$}}
647define void @store_i8_i64_with_folded_or_offset(i32 %x, i64 %v) {
648  %and = and i32 %x, -4
649  %p = inttoptr i32 %and to ptr
650  %arrayidx = getelementptr inbounds i8, ptr %p, i32 2
651  %t = trunc i64 %v to i8
652  store atomic i8 %t, ptr %arrayidx seq_cst, align 1
653  ret void
654}
655
656;===----------------------------------------------------------------------------
657; Atomic binary read-modify-writes: 32-bit
658;===----------------------------------------------------------------------------
659
660; There are several RMW instructions, but here we only test 'add' as an example.
661
662; Basic RMW.
663
664; CHECK-LABEL: rmw_add_i32_no_offset:
665; CHECK-NEXT: .functype rmw_add_i32_no_offset (i32, i32) -> (i32){{$}}
666; CHECK: i32.atomic.rmw.add $push0=, 0($0), $1{{$}}
667; CHECK-NEXT: return $pop0{{$}}
668define i32 @rmw_add_i32_no_offset(ptr %p, i32 %v) {
669  %old = atomicrmw add ptr %p, i32 %v seq_cst
670  ret i32 %old
671}
672
673; With an nuw add, we can fold an offset.
674
675; CHECK-LABEL: rmw_add_i32_with_folded_offset:
676; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}}
677define i32 @rmw_add_i32_with_folded_offset(ptr %p, i32 %v) {
678  %q = ptrtoint ptr %p to i32
679  %r = add nuw i32 %q, 24
680  %s = inttoptr i32 %r to ptr
681  %old = atomicrmw add ptr %s, i32 %v seq_cst
682  ret i32 %old
683}
684
685; With an inbounds gep, we can fold an offset.
686
687; CHECK-LABEL: rmw_add_i32_with_folded_gep_offset:
688; CHECK: i32.atomic.rmw.add $push0=, 24($0), $1{{$}}
689define i32 @rmw_add_i32_with_folded_gep_offset(ptr %p, i32 %v) {
690  %s = getelementptr inbounds i32, ptr %p, i32 6
691  %old = atomicrmw add ptr %s, i32 %v seq_cst
692  ret i32 %old
693}
694
695; We can't fold a negative offset though, even with an inbounds gep.
696
697; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_negative_offset:
698; CHECK: i32.const $push0=, -24{{$}}
699; CHECK: i32.add $push1=, $0, $pop0{{$}}
700; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
701define i32 @rmw_add_i32_with_unfolded_gep_negative_offset(ptr %p, i32 %v) {
702  %s = getelementptr inbounds i32, ptr %p, i32 -6
703  %old = atomicrmw add ptr %s, i32 %v seq_cst
704  ret i32 %old
705}
706
707; Without nuw, and even with nsw, we can't fold an offset.
708
709; CHECK-LABEL: rmw_add_i32_with_unfolded_offset:
710; CHECK: i32.const $push0=, 24{{$}}
711; CHECK: i32.add $push1=, $0, $pop0{{$}}
712; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
713define i32 @rmw_add_i32_with_unfolded_offset(ptr %p, i32 %v) {
714  %q = ptrtoint ptr %p to i32
715  %r = add nsw i32 %q, 24
716  %s = inttoptr i32 %r to ptr
717  %old = atomicrmw add ptr %s, i32 %v seq_cst
718  ret i32 %old
719}
720
721; Without inbounds, we can't fold a gep offset.
722
723; CHECK-LABEL: rmw_add_i32_with_unfolded_gep_offset:
724; CHECK: i32.const $push0=, 24{{$}}
725; CHECK: i32.add $push1=, $0, $pop0{{$}}
726; CHECK: i32.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
727define i32 @rmw_add_i32_with_unfolded_gep_offset(ptr %p, i32 %v) {
728  %s = getelementptr i32, ptr %p, i32 6
729  %old = atomicrmw add ptr %s, i32 %v seq_cst
730  ret i32 %old
731}
732
733; When loading from a fixed address, materialize a zero.
734
735; CHECK-LABEL: rmw_add_i32_from_numeric_address
736; CHECK: i32.const $push0=, 0{{$}}
737; CHECK: i32.atomic.rmw.add $push1=, 42($pop0), $0{{$}}
738define i32 @rmw_add_i32_from_numeric_address(i32 %v) {
739  %s = inttoptr i32 42 to ptr
740  %old = atomicrmw add ptr %s, i32 %v seq_cst
741  ret i32 %old
742}
743
744; CHECK-LABEL: rmw_add_i32_from_global_address
745; CHECK: i32.const $push0=, 0{{$}}
746; CHECK: i32.atomic.rmw.add $push1=, gv($pop0), $0{{$}}
747define i32 @rmw_add_i32_from_global_address(i32 %v) {
748  %old = atomicrmw add ptr @gv, i32 %v seq_cst
749  ret i32 %old
750}
751
752;===----------------------------------------------------------------------------
753; Atomic binary read-modify-writes: 64-bit
754;===----------------------------------------------------------------------------
755
756; Basic RMW.
757
758; CHECK-LABEL: rmw_add_i64_no_offset:
759; CHECK-NEXT: .functype rmw_add_i64_no_offset (i32, i64) -> (i64){{$}}
760; CHECK: i64.atomic.rmw.add $push0=, 0($0), $1{{$}}
761; CHECK-NEXT: return $pop0{{$}}
762define i64 @rmw_add_i64_no_offset(ptr %p, i64 %v) {
763  %old = atomicrmw add ptr %p, i64 %v seq_cst
764  ret i64 %old
765}
766
767; With an nuw add, we can fold an offset.
768
769; CHECK-LABEL: rmw_add_i64_with_folded_offset:
770; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}}
771define i64 @rmw_add_i64_with_folded_offset(ptr %p, i64 %v) {
772  %q = ptrtoint ptr %p to i32
773  %r = add nuw i32 %q, 24
774  %s = inttoptr i32 %r to ptr
775  %old = atomicrmw add ptr %s, i64 %v seq_cst
776  ret i64 %old
777}
778
779; With an inbounds gep, we can fold an offset.
780
781; CHECK-LABEL: rmw_add_i64_with_folded_gep_offset:
782; CHECK: i64.atomic.rmw.add $push0=, 24($0), $1{{$}}
783define i64 @rmw_add_i64_with_folded_gep_offset(ptr %p, i64 %v) {
784  %s = getelementptr inbounds i64, ptr %p, i32 3
785  %old = atomicrmw add ptr %s, i64 %v seq_cst
786  ret i64 %old
787}
788
789; We can't fold a negative offset though, even with an inbounds gep.
790
791; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_negative_offset:
792; CHECK: i32.const $push0=, -24{{$}}
793; CHECK: i32.add $push1=, $0, $pop0{{$}}
794; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
795define i64 @rmw_add_i64_with_unfolded_gep_negative_offset(ptr %p, i64 %v) {
796  %s = getelementptr inbounds i64, ptr %p, i32 -3
797  %old = atomicrmw add ptr %s, i64 %v seq_cst
798  ret i64 %old
799}
800
801; Without nuw, and even with nsw, we can't fold an offset.
802
803; CHECK-LABEL: rmw_add_i64_with_unfolded_offset:
804; CHECK: i32.const $push0=, 24{{$}}
805; CHECK: i32.add $push1=, $0, $pop0{{$}}
806; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
807define i64 @rmw_add_i64_with_unfolded_offset(ptr %p, i64 %v) {
808  %q = ptrtoint ptr %p to i32
809  %r = add nsw i32 %q, 24
810  %s = inttoptr i32 %r to ptr
811  %old = atomicrmw add ptr %s, i64 %v seq_cst
812  ret i64 %old
813}
814
815; Without inbounds, we can't fold a gep offset.
816
817; CHECK-LABEL: rmw_add_i64_with_unfolded_gep_offset:
818; CHECK: i32.const $push0=, 24{{$}}
819; CHECK: i32.add $push1=, $0, $pop0{{$}}
820; CHECK: i64.atomic.rmw.add $push2=, 0($pop1), $1{{$}}
821define i64 @rmw_add_i64_with_unfolded_gep_offset(ptr %p, i64 %v) {
822  %s = getelementptr i64, ptr %p, i32 3
823  %old = atomicrmw add ptr %s, i64 %v seq_cst
824  ret i64 %old
825}
826
827;===----------------------------------------------------------------------------
828; Atomic truncating & sign-extending binary RMWs
829;===----------------------------------------------------------------------------
830
831; Fold an offset into a sign-extending rmw.
832
833; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_offset:
834; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
835; CHECK-NEXT: i32.extend8_s $push1=, $pop0
836define i32 @rmw_add_i8_i32_s_with_folded_offset(ptr %p, i32 %v) {
837  %q = ptrtoint ptr %p to i32
838  %r = add nuw i32 %q, 24
839  %s = inttoptr i32 %r to ptr
840  %t = trunc i32 %v to i8
841  %old = atomicrmw add ptr %s, i8 %t seq_cst
842  %u = sext i8 %old to i32
843  ret i32 %u
844}
845
846; 32->64 sext rmw gets selected as i32.atomic.rmw.add, i64.extend_i32_s
847; CHECK-LABEL: rmw_add_i32_i64_s_with_folded_offset:
848; CHECK: i32.wrap_i64 $push0=, $1
849; CHECK-NEXT: i32.atomic.rmw.add $push1=, 24($0), $pop0{{$}}
850; CHECK-NEXT: i64.extend_i32_s $push2=, $pop1{{$}}
851define i64 @rmw_add_i32_i64_s_with_folded_offset(ptr %p, i64 %v) {
852  %q = ptrtoint ptr %p to i32
853  %r = add nuw i32 %q, 24
854  %s = inttoptr i32 %r to ptr
855  %t = trunc i64 %v to i32
856  %old = atomicrmw add ptr %s, i32 %t seq_cst
857  %u = sext i32 %old to i64
858  ret i64 %u
859}
860
861; Fold a gep offset into a sign-extending rmw.
862
863; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_gep_offset:
864; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
865; CHECK-NEXT: i32.extend8_s $push1=, $pop0
866define i32 @rmw_add_i8_i32_s_with_folded_gep_offset(ptr %p, i32 %v) {
867  %s = getelementptr inbounds i8, ptr %p, i32 24
868  %t = trunc i32 %v to i8
869  %old = atomicrmw add ptr %s, i8 %t seq_cst
870  %u = sext i8 %old to i32
871  ret i32 %u
872}
873
874; CHECK-LABEL: rmw_add_i16_i32_s_with_folded_gep_offset:
875; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
876; CHECK-NEXT: i32.extend16_s $push1=, $pop0
877define i32 @rmw_add_i16_i32_s_with_folded_gep_offset(ptr %p, i32 %v) {
878  %s = getelementptr inbounds i16, ptr %p, i32 24
879  %t = trunc i32 %v to i16
880  %old = atomicrmw add ptr %s, i16 %t seq_cst
881  %u = sext i16 %old to i32
882  ret i32 %u
883}
884
885; CHECK-LABEL: rmw_add_i16_i64_s_with_folded_gep_offset:
886; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
887; CHECK-NEXT: i64.extend16_s $push1=, $pop0
888define i64 @rmw_add_i16_i64_s_with_folded_gep_offset(ptr %p, i64 %v) {
889  %s = getelementptr inbounds i16, ptr %p, i32 24
890  %t = trunc i64 %v to i16
891  %old = atomicrmw add ptr %s, i16 %t seq_cst
892  %u = sext i16 %old to i64
893  ret i64 %u
894}
895
896; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
897; an 'add' if the or'ed bits are known to be zero.
898
899; CHECK-LABEL: rmw_add_i8_i32_s_with_folded_or_offset:
900; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
901; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
902define i32 @rmw_add_i8_i32_s_with_folded_or_offset(i32 %x, i32 %v) {
903  %and = and i32 %x, -4
904  %t0 = inttoptr i32 %and to ptr
905  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
906  %t = trunc i32 %v to i8
907  %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst
908  %conv = sext i8 %old to i32
909  ret i32 %conv
910}
911
912; CHECK-LABEL: rmw_add_i8_i64_s_with_folded_or_offset:
913; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
914; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
915define i64 @rmw_add_i8_i64_s_with_folded_or_offset(i32 %x, i64 %v) {
916  %and = and i32 %x, -4
917  %t0 = inttoptr i32 %and to ptr
918  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
919  %t = trunc i64 %v to i8
920  %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst
921  %conv = sext i8 %old to i64
922  ret i64 %conv
923}
924
925; When loading from a fixed address, materialize a zero.
926
927; CHECK-LABEL: rmw_add_i16_i32_s_from_numeric_address
928; CHECK: i32.const $push0=, 0{{$}}
929; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}}
930; CHECK-NEXT: i32.extend16_s $push2=, $pop1
931define i32 @rmw_add_i16_i32_s_from_numeric_address(i32 %v) {
932  %s = inttoptr i32 42 to ptr
933  %t = trunc i32 %v to i16
934  %old = atomicrmw add ptr %s, i16 %t seq_cst
935  %u = sext i16 %old to i32
936  ret i32 %u
937}
938
939; CHECK-LABEL: rmw_add_i8_i32_s_from_global_address
940; CHECK: i32.const $push0=, 0{{$}}
941; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}}
942; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
943define i32 @rmw_add_i8_i32_s_from_global_address(i32 %v) {
944  %t = trunc i32 %v to i8
945  %old = atomicrmw add ptr @gv8, i8 %t seq_cst
946  %u = sext i8 %old to i32
947  ret i32 %u
948}
949
950;===----------------------------------------------------------------------------
951; Atomic truncating & zero-extending binary RMWs
952;===----------------------------------------------------------------------------
953
954; Fold an offset into a zero-extending rmw.
955
956; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_offset:
957; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
958define i32 @rmw_add_i8_i32_z_with_folded_offset(ptr %p, i32 %v) {
959  %q = ptrtoint ptr %p to i32
960  %r = add nuw i32 %q, 24
961  %s = inttoptr i32 %r to ptr
962  %t = trunc i32 %v to i8
963  %old = atomicrmw add ptr %s, i8 %t seq_cst
964  %u = zext i8 %old to i32
965  ret i32 %u
966}
967
968; CHECK-LABEL: rmw_add_i32_i64_z_with_folded_offset:
969; CHECK: i64.atomic.rmw32.add_u $push0=, 24($0), $1{{$}}
970define i64 @rmw_add_i32_i64_z_with_folded_offset(ptr %p, i64 %v) {
971  %q = ptrtoint ptr %p to i32
972  %r = add nuw i32 %q, 24
973  %s = inttoptr i32 %r to ptr
974  %t = trunc i64 %v to i32
975  %old = atomicrmw add ptr %s, i32 %t seq_cst
976  %u = zext i32 %old to i64
977  ret i64 %u
978}
979
980; Fold a gep offset into a zero-extending rmw.
981
982; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_gep_offset:
983; CHECK: i32.atomic.rmw8.add_u $push0=, 24($0), $1{{$}}
984define i32 @rmw_add_i8_i32_z_with_folded_gep_offset(ptr %p, i32 %v) {
985  %s = getelementptr inbounds i8, ptr %p, i32 24
986  %t = trunc i32 %v to i8
987  %old = atomicrmw add ptr %s, i8 %t seq_cst
988  %u = zext i8 %old to i32
989  ret i32 %u
990}
991
992; CHECK-LABEL: rmw_add_i16_i32_z_with_folded_gep_offset:
993; CHECK: i32.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
994define i32 @rmw_add_i16_i32_z_with_folded_gep_offset(ptr %p, i32 %v) {
995  %s = getelementptr inbounds i16, ptr %p, i32 24
996  %t = trunc i32 %v to i16
997  %old = atomicrmw add ptr %s, i16 %t seq_cst
998  %u = zext i16 %old to i32
999  ret i32 %u
1000}
1001
1002; CHECK-LABEL: rmw_add_i16_i64_z_with_folded_gep_offset:
1003; CHECK: i64.atomic.rmw16.add_u $push0=, 48($0), $1{{$}}
1004define i64 @rmw_add_i16_i64_z_with_folded_gep_offset(ptr %p, i64 %v) {
1005  %s = getelementptr inbounds i16, ptr %p, i32 24
1006  %t = trunc i64 %v to i16
1007  %old = atomicrmw add ptr %s, i16 %t seq_cst
1008  %u = zext i16 %old to i64
1009  ret i64 %u
1010}
1011
1012; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1013; an 'add' if the or'ed bits are known to be zero.
1014
1015; CHECK-LABEL: rmw_add_i8_i32_z_with_folded_or_offset:
1016; CHECK: i32.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
1017define i32 @rmw_add_i8_i32_z_with_folded_or_offset(i32 %x, i32 %v) {
1018  %and = and i32 %x, -4
1019  %t0 = inttoptr i32 %and to ptr
1020  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1021  %t = trunc i32 %v to i8
1022  %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst
1023  %conv = zext i8 %old to i32
1024  ret i32 %conv
1025}
1026
1027; CHECK-LABEL: rmw_add_i8_i64_z_with_folded_or_offset:
1028; CHECK: i64.atomic.rmw8.add_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1{{$}}
1029define i64 @rmw_add_i8_i64_z_with_folded_or_offset(i32 %x, i64 %v) {
1030  %and = and i32 %x, -4
1031  %t0 = inttoptr i32 %and to ptr
1032  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1033  %t = trunc i64 %v to i8
1034  %old = atomicrmw add ptr %arrayidx, i8 %t seq_cst
1035  %conv = zext i8 %old to i64
1036  ret i64 %conv
1037}
1038
1039; When loading from a fixed address, materialize a zero.
1040
1041; CHECK-LABEL: rmw_add_i16_i32_z_from_numeric_address
1042; CHECK: i32.const $push0=, 0{{$}}
1043; CHECK: i32.atomic.rmw16.add_u $push1=, 42($pop0), $0{{$}}
1044define i32 @rmw_add_i16_i32_z_from_numeric_address(i32 %v) {
1045  %s = inttoptr i32 42 to ptr
1046  %t = trunc i32 %v to i16
1047  %old = atomicrmw add ptr %s, i16 %t seq_cst
1048  %u = zext i16 %old to i32
1049  ret i32 %u
1050}
1051
1052; CHECK-LABEL: rmw_add_i8_i32_z_from_global_address
1053; CHECK: i32.const $push0=, 0{{$}}
1054; CHECK: i32.atomic.rmw8.add_u $push1=, gv8($pop0), $0{{$}}
1055define i32 @rmw_add_i8_i32_z_from_global_address(i32 %v) {
1056  %t = trunc i32 %v to i8
1057  %old = atomicrmw add ptr @gv8, i8 %t seq_cst
1058  %u = zext i8 %old to i32
1059  ret i32 %u
1060}
1061
1062; i8 return value should test anyext RMWs
1063
1064; CHECK-LABEL: rmw_add_i8_i32_retvalue:
1065; CHECK: i32.atomic.rmw8.add_u $push0=, 0($0), $1{{$}}
1066; CHECK-NEXT: return $pop0{{$}}
1067define i8 @rmw_add_i8_i32_retvalue(ptr %p, i32 %v) {
1068  %t = trunc i32 %v to i8
1069  %old = atomicrmw add ptr %p, i8 %t seq_cst
1070  ret i8 %old
1071}
1072
1073;===----------------------------------------------------------------------------
1074; Atomic ternary read-modify-writes: 32-bit
1075;===----------------------------------------------------------------------------
1076
1077; Basic RMW.
1078
1079; CHECK-LABEL: cmpxchg_i32_no_offset:
1080; CHECK-NEXT: .functype cmpxchg_i32_no_offset (i32, i32, i32) -> (i32){{$}}
1081; CHECK: i32.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
1082; CHECK-NEXT: return $pop0{{$}}
1083define i32 @cmpxchg_i32_no_offset(ptr %p, i32 %exp, i32 %new) {
1084  %pair = cmpxchg ptr %p, i32 %exp, i32 %new seq_cst seq_cst
1085  %old = extractvalue { i32, i1 } %pair, 0
1086  ret i32 %old
1087}
1088
1089; With an nuw add, we can fold an offset.
1090
1091; CHECK-LABEL: cmpxchg_i32_with_folded_offset:
1092; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1093define i32 @cmpxchg_i32_with_folded_offset(ptr %p, i32 %exp, i32 %new) {
1094  %q = ptrtoint ptr %p to i32
1095  %r = add nuw i32 %q, 24
1096  %s = inttoptr i32 %r to ptr
1097  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1098  %old = extractvalue { i32, i1 } %pair, 0
1099  ret i32 %old
1100}
1101
1102; With an inbounds gep, we can fold an offset.
1103
1104; CHECK-LABEL: cmpxchg_i32_with_folded_gep_offset:
1105; CHECK: i32.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1106define i32 @cmpxchg_i32_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1107  %s = getelementptr inbounds i32, ptr %p, i32 6
1108  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1109  %old = extractvalue { i32, i1 } %pair, 0
1110  ret i32 %old
1111}
1112
1113; We can't fold a negative offset though, even with an inbounds gep.
1114
1115; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_negative_offset:
1116; CHECK: i32.const $push0=, -24{{$}}
1117; CHECK: i32.add $push1=, $0, $pop0{{$}}
1118; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1119define i32 @cmpxchg_i32_with_unfolded_gep_negative_offset(ptr %p, i32 %exp, i32 %new) {
1120  %s = getelementptr inbounds i32, ptr %p, i32 -6
1121  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1122  %old = extractvalue { i32, i1 } %pair, 0
1123  ret i32 %old
1124}
1125
1126; Without nuw, and even with nsw, we can't fold an offset.
1127
1128; CHECK-LABEL: cmpxchg_i32_with_unfolded_offset:
1129; CHECK: i32.const $push0=, 24{{$}}
1130; CHECK: i32.add $push1=, $0, $pop0{{$}}
1131; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1132define i32 @cmpxchg_i32_with_unfolded_offset(ptr %p, i32 %exp, i32 %new) {
1133  %q = ptrtoint ptr %p to i32
1134  %r = add nsw i32 %q, 24
1135  %s = inttoptr i32 %r to ptr
1136  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1137  %old = extractvalue { i32, i1 } %pair, 0
1138  ret i32 %old
1139}
1140
1141; Without inbounds, we can't fold a gep offset.
1142
1143; CHECK-LABEL: cmpxchg_i32_with_unfolded_gep_offset:
1144; CHECK: i32.const $push0=, 24{{$}}
1145; CHECK: i32.add $push1=, $0, $pop0{{$}}
1146; CHECK: i32.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1147define i32 @cmpxchg_i32_with_unfolded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1148  %s = getelementptr i32, ptr %p, i32 6
1149  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1150  %old = extractvalue { i32, i1 } %pair, 0
1151  ret i32 %old
1152}
1153
1154; When loading from a fixed address, materialize a zero.
1155
1156; CHECK-LABEL: cmpxchg_i32_from_numeric_address
1157; CHECK: i32.const $push0=, 0{{$}}
1158; CHECK: i32.atomic.rmw.cmpxchg $push1=, 42($pop0), $0, $1{{$}}
1159define i32 @cmpxchg_i32_from_numeric_address(i32 %exp, i32 %new) {
1160  %s = inttoptr i32 42 to ptr
1161  %pair = cmpxchg ptr %s, i32 %exp, i32 %new seq_cst seq_cst
1162  %old = extractvalue { i32, i1 } %pair, 0
1163  ret i32 %old
1164}
1165
1166; CHECK-LABEL: cmpxchg_i32_from_global_address
1167; CHECK: i32.const $push0=, 0{{$}}
1168; CHECK: i32.atomic.rmw.cmpxchg $push1=, gv($pop0), $0, $1{{$}}
1169define i32 @cmpxchg_i32_from_global_address(i32 %exp, i32 %new) {
1170  %pair = cmpxchg ptr @gv, i32 %exp, i32 %new seq_cst seq_cst
1171  %old = extractvalue { i32, i1 } %pair, 0
1172  ret i32 %old
1173}
1174
1175;===----------------------------------------------------------------------------
1176; Atomic ternary read-modify-writes: 64-bit
1177;===----------------------------------------------------------------------------
1178
1179; Basic RMW.
1180
1181; CHECK-LABEL: cmpxchg_i64_no_offset:
1182; CHECK-NEXT: .functype cmpxchg_i64_no_offset (i32, i64, i64) -> (i64){{$}}
1183; CHECK: i64.atomic.rmw.cmpxchg $push0=, 0($0), $1, $2{{$}}
1184; CHECK-NEXT: return $pop0{{$}}
1185define i64 @cmpxchg_i64_no_offset(ptr %p, i64 %exp, i64 %new) {
1186  %pair = cmpxchg ptr %p, i64 %exp, i64 %new seq_cst seq_cst
1187  %old = extractvalue { i64, i1 } %pair, 0
1188  ret i64 %old
1189}
1190
1191; With an nuw add, we can fold an offset.
1192
1193; CHECK-LABEL: cmpxchg_i64_with_folded_offset:
1194; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1195define i64 @cmpxchg_i64_with_folded_offset(ptr %p, i64 %exp, i64 %new) {
1196  %q = ptrtoint ptr %p to i32
1197  %r = add nuw i32 %q, 24
1198  %s = inttoptr i32 %r to ptr
1199  %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst
1200  %old = extractvalue { i64, i1 } %pair, 0
1201  ret i64 %old
1202}
1203
1204; With an inbounds gep, we can fold an offset.
1205
1206; CHECK-LABEL: cmpxchg_i64_with_folded_gep_offset:
1207; CHECK: i64.atomic.rmw.cmpxchg $push0=, 24($0), $1, $2{{$}}
1208define i64 @cmpxchg_i64_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) {
1209  %s = getelementptr inbounds i64, ptr %p, i32 3
1210  %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst
1211  %old = extractvalue { i64, i1 } %pair, 0
1212  ret i64 %old
1213}
1214
1215; We can't fold a negative offset though, even with an inbounds gep.
1216
1217; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_negative_offset:
1218; CHECK: i32.const $push0=, -24{{$}}
1219; CHECK: i32.add $push1=, $0, $pop0{{$}}
1220; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1221define i64 @cmpxchg_i64_with_unfolded_gep_negative_offset(ptr %p, i64 %exp, i64 %new) {
1222  %s = getelementptr inbounds i64, ptr %p, i32 -3
1223  %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst
1224  %old = extractvalue { i64, i1 } %pair, 0
1225  ret i64 %old
1226}
1227
1228; Without nuw, and even with nsw, we can't fold an offset.
1229
1230; CHECK-LABEL: cmpxchg_i64_with_unfolded_offset:
1231; CHECK: i32.const $push0=, 24{{$}}
1232; CHECK: i32.add $push1=, $0, $pop0{{$}}
1233; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1234define i64 @cmpxchg_i64_with_unfolded_offset(ptr %p, i64 %exp, i64 %new) {
1235  %q = ptrtoint ptr %p to i32
1236  %r = add nsw i32 %q, 24
1237  %s = inttoptr i32 %r to ptr
1238  %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst
1239  %old = extractvalue { i64, i1 } %pair, 0
1240  ret i64 %old
1241}
1242
1243; Without inbounds, we can't fold a gep offset.
1244
1245; CHECK-LABEL: cmpxchg_i64_with_unfolded_gep_offset:
1246; CHECK: i32.const $push0=, 24{{$}}
1247; CHECK: i32.add $push1=, $0, $pop0{{$}}
1248; CHECK: i64.atomic.rmw.cmpxchg $push2=, 0($pop1), $1, $2{{$}}
1249define i64 @cmpxchg_i64_with_unfolded_gep_offset(ptr %p, i64 %exp, i64 %new) {
1250  %s = getelementptr i64, ptr %p, i32 3
1251  %pair = cmpxchg ptr %s, i64 %exp, i64 %new seq_cst seq_cst
1252  %old = extractvalue { i64, i1 } %pair, 0
1253  ret i64 %old
1254}
1255
1256;===----------------------------------------------------------------------------
1257; Atomic truncating & sign-extending ternary RMWs
1258;===----------------------------------------------------------------------------
1259
1260; Fold an offset into a sign-extending rmw.
1261
1262; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_offset:
1263; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1264; CHECK-NEXT: i32.extend8_s $push1=, $pop0
1265define i32 @cmpxchg_i8_i32_s_with_folded_offset(ptr %p, i32 %exp, i32 %new) {
1266  %q = ptrtoint ptr %p to i32
1267  %r = add nuw i32 %q, 24
1268  %s = inttoptr i32 %r to ptr
1269  %exp_t = trunc i32 %exp to i8
1270  %new_t = trunc i32 %new to i8
1271  %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1272  %old = extractvalue { i8, i1 } %pair, 0
1273  %u = sext i8 %old to i32
1274  ret i32 %u
1275}
1276
1277; 32->64 sext rmw gets selected as i32.atomic.rmw.cmpxchg, i64.extend_i32_s
1278; CHECK-LABEL: cmpxchg_i32_i64_s_with_folded_offset:
1279; CHECK: i32.wrap_i64 $push1=, $1
1280; CHECK-NEXT: i32.wrap_i64 $push0=, $2
1281; CHECK-NEXT: i32.atomic.rmw.cmpxchg $push2=, 24($0), $pop1, $pop0{{$}}
1282; CHECK-NEXT: i64.extend_i32_s $push3=, $pop2{{$}}
1283define i64 @cmpxchg_i32_i64_s_with_folded_offset(ptr %p, i64 %exp, i64 %new) {
1284  %q = ptrtoint ptr %p to i32
1285  %r = add nuw i32 %q, 24
1286  %s = inttoptr i32 %r to ptr
1287  %exp_t = trunc i64 %exp to i32
1288  %new_t = trunc i64 %new to i32
1289  %pair = cmpxchg ptr %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
1290  %old = extractvalue { i32, i1 } %pair, 0
1291  %u = sext i32 %old to i64
1292  ret i64 %u
1293}
1294
1295; Fold a gep offset into a sign-extending rmw.
1296
1297; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_gep_offset:
1298; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1299; CHECK-NEXT: i32.extend8_s $push1=, $pop0
1300define i32 @cmpxchg_i8_i32_s_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1301  %s = getelementptr inbounds i8, ptr %p, i32 24
1302  %exp_t = trunc i32 %exp to i8
1303  %new_t = trunc i32 %new to i8
1304  %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1305  %old = extractvalue { i8, i1 } %pair, 0
1306  %u = sext i8 %old to i32
1307  ret i32 %u
1308}
1309
1310; CHECK-LABEL: cmpxchg_i16_i32_s_with_folded_gep_offset:
1311; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1312; CHECK-NEXT: i32.extend16_s $push1=, $pop0
1313define i32 @cmpxchg_i16_i32_s_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1314  %s = getelementptr inbounds i16, ptr %p, i32 24
1315  %exp_t = trunc i32 %exp to i16
1316  %new_t = trunc i32 %new to i16
1317  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1318  %old = extractvalue { i16, i1 } %pair, 0
1319  %u = sext i16 %old to i32
1320  ret i32 %u
1321}
1322
1323; CHECK-LABEL: cmpxchg_i16_i64_s_with_folded_gep_offset:
1324; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1325; CHECK-NEXT: i64.extend16_s $push1=, $pop0
1326define i64 @cmpxchg_i16_i64_s_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) {
1327  %s = getelementptr inbounds i16, ptr %p, i32 24
1328  %exp_t = trunc i64 %exp to i16
1329  %new_t = trunc i64 %new to i16
1330  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1331  %old = extractvalue { i16, i1 } %pair, 0
1332  %u = sext i16 %old to i64
1333  ret i64 %u
1334}
1335
1336; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1337; an 'add' if the or'ed bits are known to be zero.
1338
1339; CHECK-LABEL: cmpxchg_i8_i32_s_with_folded_or_offset:
1340; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1341; CHECK-NEXT: i32.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
1342define i32 @cmpxchg_i8_i32_s_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
1343  %and = and i32 %x, -4
1344  %t0 = inttoptr i32 %and to ptr
1345  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1346  %exp_t = trunc i32 %exp to i8
1347  %new_t = trunc i32 %new to i8
1348  %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1349  %old = extractvalue { i8, i1 } %pair, 0
1350  %conv = sext i8 %old to i32
1351  ret i32 %conv
1352}
1353
1354; CHECK-LABEL: cmpxchg_i8_i64_s_with_folded_or_offset:
1355; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1356; CHECK-NEXT: i64.extend8_s $push{{[0-9]+}}=, $pop[[R1]]{{$}}
1357define i64 @cmpxchg_i8_i64_s_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
1358  %and = and i32 %x, -4
1359  %t0 = inttoptr i32 %and to ptr
1360  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1361  %exp_t = trunc i64 %exp to i8
1362  %new_t = trunc i64 %new to i8
1363  %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1364  %old = extractvalue { i8, i1 } %pair, 0
1365  %conv = sext i8 %old to i64
1366  ret i64 %conv
1367}
1368
1369; When loading from a fixed address, materialize a zero.
1370
1371; CHECK-LABEL: cmpxchg_i16_i32_s_from_numeric_address
1372; CHECK: i32.const $push0=, 0{{$}}
1373; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}}
1374; CHECK-NEXT: i32.extend16_s $push2=, $pop1
1375define i32 @cmpxchg_i16_i32_s_from_numeric_address(i32 %exp, i32 %new) {
1376  %s = inttoptr i32 42 to ptr
1377  %exp_t = trunc i32 %exp to i16
1378  %new_t = trunc i32 %new to i16
1379  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1380  %old = extractvalue { i16, i1 } %pair, 0
1381  %u = sext i16 %old to i32
1382  ret i32 %u
1383}
1384
1385; CHECK-LABEL: cmpxchg_i8_i32_s_from_global_address
1386; CHECK: i32.const $push0=, 0{{$}}
1387; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}}
1388; CHECK-NEXT: i32.extend8_s $push2=, $pop1{{$}}
1389define i32 @cmpxchg_i8_i32_s_from_global_address(i32 %exp, i32 %new) {
1390  %exp_t = trunc i32 %exp to i8
1391  %new_t = trunc i32 %new to i8
1392  %pair = cmpxchg ptr @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
1393  %old = extractvalue { i8, i1 } %pair, 0
1394  %u = sext i8 %old to i32
1395  ret i32 %u
1396}
1397
1398;===----------------------------------------------------------------------------
1399; Atomic truncating & zero-extending ternary RMWs
1400;===----------------------------------------------------------------------------
1401
1402; Fold an offset into a sign-extending rmw.
1403
1404; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_offset:
1405; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1406define i32 @cmpxchg_i8_i32_z_with_folded_offset(ptr %p, i32 %exp, i32 %new) {
1407  %q = ptrtoint ptr %p to i32
1408  %r = add nuw i32 %q, 24
1409  %s = inttoptr i32 %r to ptr
1410  %exp_t = trunc i32 %exp to i8
1411  %new_t = trunc i32 %new to i8
1412  %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1413  %old = extractvalue { i8, i1 } %pair, 0
1414  %u = zext i8 %old to i32
1415  ret i32 %u
1416}
1417
1418; CHECK-LABEL: cmpxchg_i32_i64_z_with_folded_offset:
1419; CHECK: i64.atomic.rmw32.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1420define i64 @cmpxchg_i32_i64_z_with_folded_offset(ptr %p, i64 %exp, i64 %new) {
1421  %q = ptrtoint ptr %p to i32
1422  %r = add nuw i32 %q, 24
1423  %s = inttoptr i32 %r to ptr
1424  %exp_t = trunc i64 %exp to i32
1425  %new_t = trunc i64 %new to i32
1426  %pair = cmpxchg ptr %s, i32 %exp_t, i32 %new_t seq_cst seq_cst
1427  %old = extractvalue { i32, i1 } %pair, 0
1428  %u = zext i32 %old to i64
1429  ret i64 %u
1430}
1431
1432; Fold a gep offset into a sign-extending rmw.
1433
1434; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_gep_offset:
1435; CHECK: i32.atomic.rmw8.cmpxchg_u $push0=, 24($0), $1, $2{{$}}
1436define i32 @cmpxchg_i8_i32_z_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1437  %s = getelementptr inbounds i8, ptr %p, i32 24
1438  %exp_t = trunc i32 %exp to i8
1439  %new_t = trunc i32 %new to i8
1440  %pair = cmpxchg ptr %s, i8 %exp_t, i8 %new_t seq_cst seq_cst
1441  %old = extractvalue { i8, i1 } %pair, 0
1442  %u = zext i8 %old to i32
1443  ret i32 %u
1444}
1445
1446; CHECK-LABEL: cmpxchg_i16_i32_z_with_folded_gep_offset:
1447; CHECK: i32.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1448define i32 @cmpxchg_i16_i32_z_with_folded_gep_offset(ptr %p, i32 %exp, i32 %new) {
1449  %s = getelementptr inbounds i16, ptr %p, i32 24
1450  %exp_t = trunc i32 %exp to i16
1451  %new_t = trunc i32 %new to i16
1452  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1453  %old = extractvalue { i16, i1 } %pair, 0
1454  %u = zext i16 %old to i32
1455  ret i32 %u
1456}
1457
1458; CHECK-LABEL: cmpxchg_i16_i64_z_with_folded_gep_offset:
1459; CHECK: i64.atomic.rmw16.cmpxchg_u $push0=, 48($0), $1, $2{{$}}
1460define i64 @cmpxchg_i16_i64_z_with_folded_gep_offset(ptr %p, i64 %exp, i64 %new) {
1461  %s = getelementptr inbounds i16, ptr %p, i32 24
1462  %exp_t = trunc i64 %exp to i16
1463  %new_t = trunc i64 %new to i16
1464  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1465  %old = extractvalue { i16, i1 } %pair, 0
1466  %u = zext i16 %old to i64
1467  ret i64 %u
1468}
1469
1470; 'add' in this code becomes 'or' after DAG optimization. Treat an 'or' node as
1471; an 'add' if the or'ed bits are known to be zero.
1472
1473; CHECK-LABEL: cmpxchg_i8_i32_z_with_folded_or_offset:
1474; CHECK: i32.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1475define i32 @cmpxchg_i8_i32_z_with_folded_or_offset(i32 %x, i32 %exp, i32 %new) {
1476  %and = and i32 %x, -4
1477  %t0 = inttoptr i32 %and to ptr
1478  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1479  %exp_t = trunc i32 %exp to i8
1480  %new_t = trunc i32 %new to i8
1481  %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1482  %old = extractvalue { i8, i1 } %pair, 0
1483  %conv = zext i8 %old to i32
1484  ret i32 %conv
1485}
1486
1487; CHECK-LABEL: cmpxchg_i8_i64_z_with_folded_or_offset:
1488; CHECK: i64.atomic.rmw8.cmpxchg_u $push[[R1:[0-9]+]]=, 2($pop{{[0-9]+}}), $1, $2{{$}}
1489define i64 @cmpxchg_i8_i64_z_with_folded_or_offset(i32 %x, i64 %exp, i64 %new) {
1490  %and = and i32 %x, -4
1491  %t0 = inttoptr i32 %and to ptr
1492  %arrayidx = getelementptr inbounds i8, ptr %t0, i32 2
1493  %exp_t = trunc i64 %exp to i8
1494  %new_t = trunc i64 %new to i8
1495  %pair = cmpxchg ptr %arrayidx, i8 %exp_t, i8 %new_t seq_cst seq_cst
1496  %old = extractvalue { i8, i1 } %pair, 0
1497  %conv = zext i8 %old to i64
1498  ret i64 %conv
1499}
1500
1501; When loading from a fixed address, materialize a zero.
1502
1503; CHECK-LABEL: cmpxchg_i16_i32_z_from_numeric_address
1504; CHECK: i32.const $push0=, 0{{$}}
1505; CHECK: i32.atomic.rmw16.cmpxchg_u $push1=, 42($pop0), $0, $1{{$}}
1506define i32 @cmpxchg_i16_i32_z_from_numeric_address(i32 %exp, i32 %new) {
1507  %s = inttoptr i32 42 to ptr
1508  %exp_t = trunc i32 %exp to i16
1509  %new_t = trunc i32 %new to i16
1510  %pair = cmpxchg ptr %s, i16 %exp_t, i16 %new_t seq_cst seq_cst
1511  %old = extractvalue { i16, i1 } %pair, 0
1512  %u = zext i16 %old to i32
1513  ret i32 %u
1514}
1515
1516; CHECK-LABEL: cmpxchg_i8_i32_z_from_global_address
1517; CHECK: i32.const $push0=, 0{{$}}
1518; CHECK: i32.atomic.rmw8.cmpxchg_u $push1=, gv8($pop0), $0, $1{{$}}
1519define i32 @cmpxchg_i8_i32_z_from_global_address(i32 %exp, i32 %new) {
1520  %exp_t = trunc i32 %exp to i8
1521  %new_t = trunc i32 %new to i8
1522  %pair = cmpxchg ptr @gv8, i8 %exp_t, i8 %new_t seq_cst seq_cst
1523  %old = extractvalue { i8, i1 } %pair, 0
1524  %u = zext i8 %old to i32
1525  ret i32 %u
1526}
1527
1528;===----------------------------------------------------------------------------
1529; Waits: 32-bit
1530;===----------------------------------------------------------------------------
1531
1532declare i32 @llvm.wasm.memory.atomic.wait32(ptr, i32, i64)
1533
1534; Basic wait.
1535
1536; CHECK-LABEL: wait32_no_offset:
1537; CHECK: memory.atomic.wait32 $push0=, 0($0), $1, $2{{$}}
1538; CHECK-NEXT: return $pop0{{$}}
1539define i32 @wait32_no_offset(ptr %p, i32 %exp, i64 %timeout) {
1540  %v = call i32 @llvm.wasm.memory.atomic.wait32(ptr %p, i32 %exp, i64 %timeout)
1541  ret i32 %v
1542}
1543
1544; With an nuw add, we can fold an offset.
1545
1546; CHECK-LABEL: wait32_with_folded_offset:
1547; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}}
1548define i32 @wait32_with_folded_offset(ptr %p, i32 %exp, i64 %timeout) {
1549  %q = ptrtoint ptr %p to i32
1550  %r = add nuw i32 %q, 24
1551  %s = inttoptr i32 %r to ptr
1552  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1553  ret i32 %t
1554}
1555
1556; With an inbounds gep, we can fold an offset.
1557
1558; CHECK-LABEL: wait32_with_folded_gep_offset:
1559; CHECK: memory.atomic.wait32 $push0=, 24($0), $1, $2{{$}}
1560define i32 @wait32_with_folded_gep_offset(ptr %p, i32 %exp, i64 %timeout) {
1561  %s = getelementptr inbounds i32, ptr %p, i32 6
1562  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1563  ret i32 %t
1564}
1565
1566; We can't fold a negative offset though, even with an inbounds gep.
1567
1568; CHECK-LABEL: wait32_with_unfolded_gep_negative_offset:
1569; CHECK: i32.const $push0=, -24{{$}}
1570; CHECK: i32.add $push1=, $0, $pop0{{$}}
1571; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1572define i32 @wait32_with_unfolded_gep_negative_offset(ptr %p, i32 %exp, i64 %timeout) {
1573  %s = getelementptr inbounds i32, ptr %p, i32 -6
1574  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1575  ret i32 %t
1576}
1577
1578; Without nuw, and even with nsw, we can't fold an offset.
1579
1580; CHECK-LABEL: wait32_with_unfolded_offset:
1581; CHECK: i32.const $push0=, 24{{$}}
1582; CHECK: i32.add $push1=, $0, $pop0{{$}}
1583; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1584define i32 @wait32_with_unfolded_offset(ptr %p, i32 %exp, i64 %timeout) {
1585  %q = ptrtoint ptr %p to i32
1586  %r = add nsw i32 %q, 24
1587  %s = inttoptr i32 %r to ptr
1588  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1589  ret i32 %t
1590}
1591
1592; Without inbounds, we can't fold a gep offset.
1593
1594; CHECK-LABEL: wait32_with_unfolded_gep_offset:
1595; CHECK: i32.const $push0=, 24{{$}}
1596; CHECK: i32.add $push1=, $0, $pop0{{$}}
1597; CHECK: memory.atomic.wait32 $push2=, 0($pop1), $1, $2{{$}}
1598define i32 @wait32_with_unfolded_gep_offset(ptr %p, i32 %exp, i64 %timeout) {
1599  %s = getelementptr i32, ptr %p, i32 6
1600  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1601  ret i32 %t
1602}
1603
1604; When waiting from a fixed address, materialize a zero.
1605
1606; CHECK-LABEL: wait32_from_numeric_address
1607; CHECK: i32.const $push0=, 0{{$}}
1608; CHECK: memory.atomic.wait32 $push1=, 42($pop0), $0, $1{{$}}
1609define i32 @wait32_from_numeric_address(i32 %exp, i64 %timeout) {
1610  %s = inttoptr i32 42 to ptr
1611  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr %s, i32 %exp, i64 %timeout)
1612  ret i32 %t
1613}
1614
1615; CHECK-LABEL: wait32_from_global_address
1616; CHECK: i32.const $push0=, 0{{$}}
1617; CHECK: memory.atomic.wait32 $push1=, gv($pop0), $0, $1{{$}}
1618define i32 @wait32_from_global_address(i32 %exp, i64 %timeout) {
1619  %t = call i32 @llvm.wasm.memory.atomic.wait32(ptr @gv, i32 %exp, i64 %timeout)
1620  ret i32 %t
1621}
1622
1623;===----------------------------------------------------------------------------
1624; Waits: 64-bit
1625;===----------------------------------------------------------------------------
1626
1627declare i32 @llvm.wasm.memory.atomic.wait64(ptr, i64, i64)
1628
1629; Basic wait.
1630
1631; CHECK-LABEL: wait64_no_offset:
1632; CHECK: memory.atomic.wait64 $push0=, 0($0), $1, $2{{$}}
1633; CHECK-NEXT: return $pop0{{$}}
1634define i32 @wait64_no_offset(ptr %p, i64 %exp, i64 %timeout) {
1635  %v = call i32 @llvm.wasm.memory.atomic.wait64(ptr %p, i64 %exp, i64 %timeout)
1636  ret i32 %v
1637}
1638
1639; With an nuw add, we can fold an offset.
1640
1641; CHECK-LABEL: wait64_with_folded_offset:
1642; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}}
1643define i32 @wait64_with_folded_offset(ptr %p, i64 %exp, i64 %timeout) {
1644  %q = ptrtoint ptr %p to i32
1645  %r = add nuw i32 %q, 24
1646  %s = inttoptr i32 %r to ptr
1647  %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout)
1648  ret i32 %t
1649}
1650
1651; With an inbounds gep, we can fold an offset.
1652
1653; CHECK-LABEL: wait64_with_folded_gep_offset:
1654; CHECK: memory.atomic.wait64 $push0=, 24($0), $1, $2{{$}}
1655define i32 @wait64_with_folded_gep_offset(ptr %p, i64 %exp, i64 %timeout) {
1656  %s = getelementptr inbounds i64, ptr %p, i32 3
1657  %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout)
1658  ret i32 %t
1659}
1660
1661; We can't fold a negative offset though, even with an inbounds gep.
1662
1663; CHECK-LABEL: wait64_with_unfolded_gep_negative_offset:
1664; CHECK: i32.const $push0=, -24{{$}}
1665; CHECK: i32.add $push1=, $0, $pop0{{$}}
1666; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1667define i32 @wait64_with_unfolded_gep_negative_offset(ptr %p, i64 %exp, i64 %timeout) {
1668  %s = getelementptr inbounds i64, ptr %p, i32 -3
1669  %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout)
1670  ret i32 %t
1671}
1672
1673; Without nuw, and even with nsw, we can't fold an offset.
1674
1675; CHECK-LABEL: wait64_with_unfolded_offset:
1676; CHECK: i32.const $push0=, 24{{$}}
1677; CHECK: i32.add $push1=, $0, $pop0{{$}}
1678; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1679define i32 @wait64_with_unfolded_offset(ptr %p, i64 %exp, i64 %timeout) {
1680  %q = ptrtoint ptr %p to i32
1681  %r = add nsw i32 %q, 24
1682  %s = inttoptr i32 %r to ptr
1683  %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout)
1684  ret i32 %t
1685}
1686
1687; Without inbounds, we can't fold a gep offset.
1688
1689; CHECK-LABEL: wait64_with_unfolded_gep_offset:
1690; CHECK: i32.const $push0=, 24{{$}}
1691; CHECK: i32.add $push1=, $0, $pop0{{$}}
1692; CHECK: memory.atomic.wait64 $push2=, 0($pop1), $1, $2{{$}}
1693define i32 @wait64_with_unfolded_gep_offset(ptr %p, i64 %exp, i64 %timeout) {
1694  %s = getelementptr i64, ptr %p, i32 3
1695  %t = call i32 @llvm.wasm.memory.atomic.wait64(ptr %s, i64 %exp, i64 %timeout)
1696  ret i32 %t
1697}
1698
1699;===----------------------------------------------------------------------------
1700; Notifies
1701;===----------------------------------------------------------------------------
1702
1703declare i32 @llvm.wasm.memory.atomic.notify(ptr, i32)
1704
1705; Basic notify.
1706
1707; CHECK-LABEL: notify_no_offset:
1708; CHECK: memory.atomic.notify $push0=, 0($0), $1{{$}}
1709; CHECK-NEXT: return $pop0{{$}}
1710define i32 @notify_no_offset(ptr %p, i32 %notify_count) {
1711  %v = call i32 @llvm.wasm.memory.atomic.notify(ptr %p, i32 %notify_count)
1712  ret i32 %v
1713}
1714
1715; With an nuw add, we can fold an offset.
1716
1717; CHECK-LABEL: notify_with_folded_offset:
1718; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}}
1719define i32 @notify_with_folded_offset(ptr %p, i32 %notify_count) {
1720  %q = ptrtoint ptr %p to i32
1721  %r = add nuw i32 %q, 24
1722  %s = inttoptr i32 %r to ptr
1723  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1724  ret i32 %t
1725}
1726
1727; With an inbounds gep, we can fold an offset.
1728
1729; CHECK-LABEL: notify_with_folded_gep_offset:
1730; CHECK: memory.atomic.notify $push0=, 24($0), $1{{$}}
1731define i32 @notify_with_folded_gep_offset(ptr %p, i32 %notify_count) {
1732  %s = getelementptr inbounds i32, ptr %p, i32 6
1733  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1734  ret i32 %t
1735}
1736
1737; We can't fold a negative offset though, even with an inbounds gep.
1738
1739; CHECK-LABEL: notify_with_unfolded_gep_negative_offset:
1740; CHECK: i32.const $push0=, -24{{$}}
1741; CHECK: i32.add $push1=, $0, $pop0{{$}}
1742; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1743define i32 @notify_with_unfolded_gep_negative_offset(ptr %p, i32 %notify_count) {
1744  %s = getelementptr inbounds i32, ptr %p, i32 -6
1745  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1746  ret i32 %t
1747}
1748
1749; Without nuw, and even with nsw, we can't fold an offset.
1750
1751; CHECK-LABEL: notify_with_unfolded_offset:
1752; CHECK: i32.const $push0=, 24{{$}}
1753; CHECK: i32.add $push1=, $0, $pop0{{$}}
1754; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1755define i32 @notify_with_unfolded_offset(ptr %p, i32 %notify_count) {
1756  %q = ptrtoint ptr %p to i32
1757  %r = add nsw i32 %q, 24
1758  %s = inttoptr i32 %r to ptr
1759  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1760  ret i32 %t
1761}
1762
1763; Without inbounds, we can't fold a gep offset.
1764
1765; CHECK-LABEL: notify_with_unfolded_gep_offset:
1766; CHECK: i32.const $push0=, 24{{$}}
1767; CHECK: i32.add $push1=, $0, $pop0{{$}}
1768; CHECK: memory.atomic.notify $push2=, 0($pop1), $1{{$}}
1769define i32 @notify_with_unfolded_gep_offset(ptr %p, i32 %notify_count) {
1770  %s = getelementptr i32, ptr %p, i32 6
1771  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1772  ret i32 %t
1773}
1774
1775; When notifying from a fixed address, materialize a zero.
1776
1777; CHECK-LABEL: notify_from_numeric_address
1778; CHECK: i32.const $push0=, 0{{$}}
1779; CHECK: memory.atomic.notify $push1=, 42($pop0), $0{{$}}
1780define i32 @notify_from_numeric_address(i32 %notify_count) {
1781  %s = inttoptr i32 42 to ptr
1782  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr %s, i32 %notify_count)
1783  ret i32 %t
1784}
1785
1786; CHECK-LABEL: notify_from_global_address
1787; CHECK: i32.const $push0=, 0{{$}}
1788; CHECK: memory.atomic.notify $push1=, gv($pop0), $0{{$}}
1789define i32 @notify_from_global_address(i32 %notify_count) {
1790  %t = call i32 @llvm.wasm.memory.atomic.notify(ptr @gv, i32 %notify_count)
1791  ret i32 %t
1792}
1793