xref: /llvm-project/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16-system.ll (revision 7927bcdb8a32646f78c01535050ada6ddc23f4f5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck %s
3
4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
5
6define i16 @test_atomicrmw_xchg_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
7; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system(
8; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
9; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
10; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
11; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
12; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
13; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
14; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
15; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
16; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
17; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
18; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
19; CHECK:       atomicrmw.start:
20; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
21; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
22; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
23; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4
24; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
25; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
26; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
27; CHECK:       atomicrmw.end:
28; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
29; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
30; CHECK-NEXT:    ret i16 [[EXTRACTED]]
31;
32  %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst
33  ret i16 %res
34}
35
36define i16 @test_atomicrmw_xchg_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
37; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_system_align4(
38; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
39; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
40; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
41; CHECK:       atomicrmw.start:
42; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
43; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[LOADED]], -65536
44; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
45; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] seq_cst seq_cst, align 4
46; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
47; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
48; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
49; CHECK:       atomicrmw.end:
50; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
51; CHECK-NEXT:    ret i16 [[EXTRACTED]]
52;
53  %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
54  ret i16 %res
55}
56
57define i16 @test_atomicrmw_add_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
58; CHECK-LABEL: @test_atomicrmw_add_i16_global_system(
59; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
60; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
61; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
62; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
63; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
64; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
65; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
66; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
67; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
68; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
69; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
70; CHECK:       atomicrmw.start:
71; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
72; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
73; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
74; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
75; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
76; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
77; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
78; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
79; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
80; CHECK:       atomicrmw.end:
81; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
82; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
83; CHECK-NEXT:    ret i16 [[EXTRACTED]]
84;
85  %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst
86  ret i16 %res
87}
88
89define i16 @test_atomicrmw_add_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
90; CHECK-LABEL: @test_atomicrmw_add_i16_global_system_align4(
91; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
92; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
93; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
94; CHECK:       atomicrmw.start:
95; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
96; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
97; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[NEW]], 65535
98; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
99; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
100; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
101; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
102; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
103; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
104; CHECK:       atomicrmw.end:
105; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
106; CHECK-NEXT:    ret i16 [[EXTRACTED]]
107;
108  %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
109  ret i16 %res
110}
111
112define i16 @test_atomicrmw_sub_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
113; CHECK-LABEL: @test_atomicrmw_sub_i16_global_system(
114; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
115; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
116; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
117; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
118; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
119; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
120; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
121; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
122; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
123; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
124; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
125; CHECK:       atomicrmw.start:
126; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
127; CHECK-NEXT:    [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
128; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
129; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
130; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
131; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
132; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
133; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
134; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
135; CHECK:       atomicrmw.end:
136; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
137; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
138; CHECK-NEXT:    ret i16 [[EXTRACTED]]
139;
140  %res = atomicrmw sub ptr addrspace(1) %ptr, i16 %value seq_cst
141  ret i16 %res
142}
143
144define i16 @test_atomicrmw_and_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
145; CHECK-LABEL: @test_atomicrmw_and_i16_global_system(
146; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
147; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
148; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
149; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
150; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
151; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
152; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
153; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
154; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
155; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
156; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] seq_cst, align 4
157; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
158; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
159; CHECK-NEXT:    ret i16 [[EXTRACTED]]
160;
161  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value seq_cst
162  ret i16 %res
163}
164
165define i16 @test_atomicrmw_nand_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
166; CHECK-LABEL: @test_atomicrmw_nand_i16_global_system(
167; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
168; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
169; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
170; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
171; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
172; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
173; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
174; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
175; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
176; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
177; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
178; CHECK:       atomicrmw.start:
179; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
180; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
181; CHECK-NEXT:    [[NEW:%.*]] = xor i32 [[TMP5]], -1
182; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]]
183; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
184; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]]
185; CHECK-NEXT:    [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] seq_cst seq_cst, align 4
186; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1
187; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0
188; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
189; CHECK:       atomicrmw.end:
190; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
191; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
192; CHECK-NEXT:    ret i16 [[EXTRACTED]]
193;
194  %res = atomicrmw nand ptr addrspace(1) %ptr, i16 %value seq_cst
195  ret i16 %res
196}
197
198define i16 @test_atomicrmw_or_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
199; CHECK-LABEL: @test_atomicrmw_or_i16_global_system(
200; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
201; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
202; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
203; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
204; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
205; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
206; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
207; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
208; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
209; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
210; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
211; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
212; CHECK-NEXT:    ret i16 [[EXTRACTED]]
213;
214  %res = atomicrmw or ptr addrspace(1) %ptr, i16 %value seq_cst
215  ret i16 %res
216}
217
218define i16 @test_atomicrmw_xor_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
219; CHECK-LABEL: @test_atomicrmw_xor_i16_global_system(
220; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
221; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
222; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
223; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
224; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
225; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
226; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
227; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
228; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
229; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] seq_cst, align 4
230; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
231; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
232; CHECK-NEXT:    ret i16 [[EXTRACTED]]
233;
234  %res = atomicrmw xor ptr addrspace(1) %ptr, i16 %value seq_cst
235  ret i16 %res
236}
237
238define i16 @test_atomicrmw_max_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
239; CHECK-LABEL: @test_atomicrmw_max_i16_global_system(
240; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
241; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
242; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
243; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
244; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
245; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
246; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
247; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
248; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
249; CHECK:       atomicrmw.start:
250; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
251; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
252; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
253; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]]
254; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
255; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
256; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
257; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
258; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
259; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
260; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
261; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
262; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
263; CHECK:       atomicrmw.end:
264; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
265; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
266; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
267;
268  %res = atomicrmw max ptr addrspace(1) %ptr, i16 %value seq_cst
269  ret i16 %res
270}
271
272define i16 @test_atomicrmw_min_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
273; CHECK-LABEL: @test_atomicrmw_min_i16_global_system(
274; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
275; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
276; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
277; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
278; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
279; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
280; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
281; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
282; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
283; CHECK:       atomicrmw.start:
284; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
285; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
286; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
287; CHECK-NEXT:    [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]]
288; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
289; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
290; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
291; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
292; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
293; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
294; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
295; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
296; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
297; CHECK:       atomicrmw.end:
298; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
299; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
300; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
301;
302  %res = atomicrmw min ptr addrspace(1) %ptr, i16 %value seq_cst
303  ret i16 %res
304}
305
306define i16 @test_atomicrmw_umax_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
307; CHECK-LABEL: @test_atomicrmw_umax_i16_global_system(
308; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
309; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
310; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
311; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
312; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
313; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
314; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
315; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
316; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
317; CHECK:       atomicrmw.start:
318; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
319; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
320; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
321; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
322; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
323; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
324; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
325; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
326; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
327; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
328; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
329; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
330; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
331; CHECK:       atomicrmw.end:
332; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
333; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
334; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
335;
336  %res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value seq_cst
337  ret i16 %res
338}
339
340define i16 @test_atomicrmw_umin_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
341; CHECK-LABEL: @test_atomicrmw_umin_i16_global_system(
342; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
343; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
344; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
345; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
346; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
347; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
348; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
349; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
350; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
351; CHECK:       atomicrmw.start:
352; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
353; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
354; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
355; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]]
356; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
357; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
358; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
359; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
360; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
361; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
362; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
363; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
364; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
365; CHECK:       atomicrmw.end:
366; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
367; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
368; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
369;
370  %res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value seq_cst
371  ret i16 %res
372}
373
374define i16 @test_cmpxchg_i16_global_system(ptr addrspace(1) %out, i16 %in, i16 %old) {
375; CHECK-LABEL: @test_cmpxchg_i16_global_system(
376; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
377; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4)
378; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
379; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
380; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
381; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
382; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
383; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
384; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32
385; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
386; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32
387; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
388; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
389; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
390; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
391; CHECK:       partword.cmpxchg.loop:
392; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
393; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
394; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
395; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4
396; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
397; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
398; CHECK-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
399; CHECK:       partword.cmpxchg.failure:
400; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
401; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
402; CHECK-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
403; CHECK:       partword.cmpxchg.end:
404; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
405; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
406; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
407; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
408; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
409; CHECK-NEXT:    ret i16 [[EXTRACT]]
410;
411  %gep = getelementptr i16, ptr addrspace(1) %out, i64 4
412  %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst
413  %extract = extractvalue {i16, i1} %res, 0
414  ret i16 %extract
415}
416
417define i16 @test_cmpxchg_i16_global_system_align4(ptr addrspace(1) %out, i16 %in, i16 %old) {
418; CHECK-LABEL: @test_cmpxchg_i16_global_system_align4(
419; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
420; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32
421; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32
422; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
423; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], -65536
424; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
425; CHECK:       partword.cmpxchg.loop:
426; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
427; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
428; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]]
429; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[GEP]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4
430; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0
431; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
432; CHECK-NEXT:    br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
433; CHECK:       partword.cmpxchg.failure:
434; CHECK-NEXT:    [[TMP11]] = and i32 [[TMP9]], -65536
435; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]]
436; CHECK-NEXT:    br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
437; CHECK:       partword.cmpxchg.end:
438; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16
439; CHECK-NEXT:    [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
440; CHECK-NEXT:    [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1
441; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0
442; CHECK-NEXT:    ret i16 [[EXTRACT]]
443;
444  %gep = getelementptr i16, ptr addrspace(1) %out, i64 4
445  %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst, align 4
446  %extract = extractvalue {i16, i1} %res, 0
447  ret i16 %extract
448}
449
450define i16 @test_atomicrmw_inc_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
451; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system(
452; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
453; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
454; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
455; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
456; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
457; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
458; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
459; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
460; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
461; CHECK:       atomicrmw.start:
462; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
463; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
464; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
465; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
466; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
467; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
468; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
469; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
470; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
471; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
472; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
473; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
474; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
475; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
476; CHECK:       atomicrmw.end:
477; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
478; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
479; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
480;
481  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst
482  ret i16 %res
483}
484
485define i16 @test_atomicrmw_inc_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
486; CHECK-LABEL: @test_atomicrmw_inc_i16_global_system_align4(
487; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
488; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
489; CHECK:       atomicrmw.start:
490; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
491; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
492; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
493; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
494; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
495; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
496; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
497; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
498; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
499; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
500; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
501; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
502; CHECK:       atomicrmw.end:
503; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
504; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
505;
506  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
507  ret i16 %res
508}
509
510define i16 @test_atomicrmw_inc_i16_flat_system(ptr %ptr, i16 %value) {
511; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system(
512; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
513; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
514; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
515; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
516; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
517; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
518; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
519; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
520; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
521; CHECK:       atomicrmw.start:
522; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
523; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
524; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
525; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
526; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
527; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
528; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
529; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
530; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
531; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
532; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
533; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
534; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
535; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
536; CHECK:       atomicrmw.end:
537; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
538; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
539; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
540;
541  %res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst
542  ret i16 %res
543}
544
545define i16 @test_atomicrmw_inc_i16_flat_system_align4(ptr %ptr, i16 %value) {
546; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_system_align4(
547; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
548; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
549; CHECK:       atomicrmw.start:
550; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
551; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
552; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
553; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
554; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
555; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
556; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
557; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
558; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
559; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
560; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
561; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
562; CHECK:       atomicrmw.end:
563; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
564; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
565;
566  %res = atomicrmw uinc_wrap ptr %ptr, i16 %value seq_cst, align 4
567  ret i16 %res
568}
569
570define i16 @test_atomicrmw_dec_i16_global_system(ptr addrspace(1) %ptr, i16 %value) {
571; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system(
572; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
573; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
574; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
575; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
576; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
577; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
578; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
579; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
580; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
581; CHECK:       atomicrmw.start:
582; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
583; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
584; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
585; CHECK-NEXT:    [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
586; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
587; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
588; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
589; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
590; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
591; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
592; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
593; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
594; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
595; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
596; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
597; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
598; CHECK:       atomicrmw.end:
599; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
600; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
601; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
602;
603  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst
604  ret i16 %res
605}
606
607define i16 @test_atomicrmw_dec_i16_global_system_align4(ptr addrspace(1) %ptr, i16 %value) {
608; CHECK-LABEL: @test_atomicrmw_dec_i16_global_system_align4(
609; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
610; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
611; CHECK:       atomicrmw.start:
612; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
613; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
614; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
615; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
616; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
617; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
618; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
619; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
620; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
621; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
622; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
623; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
624; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
625; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
626; CHECK:       atomicrmw.end:
627; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
628; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
629;
630  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value seq_cst, align 4
631  ret i16 %res
632}
633
634define i16 @test_atomicrmw_dec_i16_flat_system(ptr %ptr, i16 %value) {
635; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system(
636; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
637; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
638; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
639; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
640; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
641; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
642; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
643; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
644; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
645; CHECK:       atomicrmw.start:
646; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
647; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
648; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
649; CHECK-NEXT:    [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
650; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
651; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
652; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
653; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
654; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
655; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
656; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
657; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
658; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
659; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
660; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
661; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
662; CHECK:       atomicrmw.end:
663; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
664; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
665; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
666;
667  %res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst
668  ret i16 %res
669}
670
671define i16 @test_atomicrmw_dec_i16_flat_system_align4(ptr %ptr, i16 %value) {
672; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_system_align4(
673; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
674; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
675; CHECK:       atomicrmw.start:
676; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
677; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
678; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
679; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
680; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
681; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
682; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
683; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
684; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
685; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
686; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
687; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
688; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
689; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
690; CHECK:       atomicrmw.end:
691; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
692; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
693;
694  %res = atomicrmw udec_wrap ptr %ptr, i16 %value seq_cst, align 4
695  ret i16 %res
696}
697
698define half @test_atomicrmw_xchg_f16_global_system(ptr addrspace(1) %ptr, half %value) {
699; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system(
700; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
701; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
702; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
703; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
704; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
705; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
706; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
707; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
708; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
709; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
710; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
711; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
712; CHECK:       atomicrmw.start:
713; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
714; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
715; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
716; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
717; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
718; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
719; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
720; CHECK:       atomicrmw.end:
721; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
722; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
723; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
724; CHECK-NEXT:    ret half [[TMP9]]
725;
726  %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst
727  ret half %res
728}
729
730define half @test_atomicrmw_xchg_f16_global_system_align4(ptr addrspace(1) %ptr, half %value) {
731; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_system_align4(
732; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
733; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
734; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
735; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
736; CHECK:       atomicrmw.start:
737; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
738; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
739; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
740; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
741; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
742; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
743; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
744; CHECK:       atomicrmw.end:
745; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
746; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
747; CHECK-NEXT:    ret half [[TMP7]]
748;
749  %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value seq_cst, align 4
750  ret half %res
751}
752
753define half @test_atomicrmw_xchg_f16_flat_system(ptr %ptr, half %value) {
754; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system(
755; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
756; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
757; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
758; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
759; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
760; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
761; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
762; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
763; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
764; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
765; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
766; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
767; CHECK:       atomicrmw.start:
768; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
769; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
770; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
771; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
772; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
773; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
774; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
775; CHECK:       atomicrmw.end:
776; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
777; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
778; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
779; CHECK-NEXT:    ret half [[TMP9]]
780;
781  %res = atomicrmw xchg ptr %ptr, half %value seq_cst
782  ret half %res
783}
784
785define half @test_atomicrmw_xchg_f16_flat_system_align4(ptr %ptr, half %value) {
786; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_system_align4(
787; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
788; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
789; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4
790; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
791; CHECK:       atomicrmw.start:
792; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
793; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
794; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
795; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
796; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
797; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
798; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
799; CHECK:       atomicrmw.end:
800; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
801; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
802; CHECK-NEXT:    ret half [[TMP7]]
803;
804  %res = atomicrmw xchg ptr %ptr, half %value seq_cst, align 4
805  ret half %res
806}
807
808define bfloat @test_atomicrmw_xchg_bf16_flat_system(ptr %ptr, bfloat %value) {
809; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system(
810; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
811; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
812; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
813; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
814; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
815; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
816; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
817; CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
818; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
819; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
820; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
821; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
822; CHECK:       atomicrmw.start:
823; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
824; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
825; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
826; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] seq_cst seq_cst, align 4
827; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
828; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
829; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
830; CHECK:       atomicrmw.end:
831; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
832; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
833; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
834; CHECK-NEXT:    ret bfloat [[TMP9]]
835;
836  %res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst
837  ret bfloat %res
838}
839
840define bfloat @test_atomicrmw_xchg_bf16_flat_system_align4(ptr %ptr, bfloat %value) {
841; CHECK-LABEL: @test_atomicrmw_xchg_bf16_flat_system_align4(
842; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
843; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
844; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4
845; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
846; CHECK:       atomicrmw.start:
847; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
848; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
849; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
850; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] seq_cst seq_cst, align 4
851; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
852; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
853; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
854; CHECK:       atomicrmw.end:
855; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
856; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
857; CHECK-NEXT:    ret bfloat [[TMP7]]
858;
859  %res = atomicrmw xchg ptr %ptr, bfloat %value seq_cst, align 4
860  ret bfloat %res
861}
862