xref: /llvm-project/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i16.ll (revision f3afdc4ad980bfba5c196f2248bedf03945cd32a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,BASE %s
3; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck -check-prefixes=CHECK,GCN,GFX940 %s
4; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck  -check-prefixes=CHECK,R600 %s
5
6target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
7
8define i16 @test_atomicrmw_xchg_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
9; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_agent(
10; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
11; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
12; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
13; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
14; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
15; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
16; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
17; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
18; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
19; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
20; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
21; CHECK:       atomicrmw.start:
22; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
23; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
24; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
25; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
26; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
27; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
28; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
29; CHECK:       atomicrmw.end:
30; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
31; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
32; CHECK-NEXT:    ret i16 [[EXTRACTED]]
33;
34  %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
35  ret i16 %res
36}
37
38define i16 @test_atomicrmw_xchg_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
39; CHECK-LABEL: @test_atomicrmw_xchg_i16_global_agent_align4(
40; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
41; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
42; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
43; CHECK:       atomicrmw.start:
44; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
45; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[LOADED]], -65536
46; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
47; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4
48; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
49; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
50; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
51; CHECK:       atomicrmw.end:
52; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
53; CHECK-NEXT:    ret i16 [[EXTRACTED]]
54;
55  %res = atomicrmw xchg ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
56  ret i16 %res
57}
58
59define i16 @test_atomicrmw_add_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
60; CHECK-LABEL: @test_atomicrmw_add_i16_global_agent(
61; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
62; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
63; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
64; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
65; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
66; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
67; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
68; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
69; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
70; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
71; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
72; CHECK:       atomicrmw.start:
73; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
74; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
75; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
76; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
77; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
78; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
79; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
80; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
81; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
82; CHECK:       atomicrmw.end:
83; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
84; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
85; CHECK-NEXT:    ret i16 [[EXTRACTED]]
86;
87  %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
88  ret i16 %res
89}
90
91define i16 @test_atomicrmw_add_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
92; CHECK-LABEL: @test_atomicrmw_add_i16_global_agent_align4(
93; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
94; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
95; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
96; CHECK:       atomicrmw.start:
97; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
98; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
99; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[NEW]], 65535
100; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
101; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
102; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
103; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
104; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
105; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
106; CHECK:       atomicrmw.end:
107; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
108; CHECK-NEXT:    ret i16 [[EXTRACTED]]
109;
110  %res = atomicrmw add ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
111  ret i16 %res
112}
113
114define i16 @test_atomicrmw_sub_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
115; CHECK-LABEL: @test_atomicrmw_sub_i16_global_agent(
116; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
117; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
118; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
119; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
120; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
121; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
122; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
123; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
124; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
125; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
126; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
127; CHECK:       atomicrmw.start:
128; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
129; CHECK-NEXT:    [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
130; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
131; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
132; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
133; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
134; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
135; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
136; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
137; CHECK:       atomicrmw.end:
138; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
139; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
140; CHECK-NEXT:    ret i16 [[EXTRACTED]]
141;
142  %res = atomicrmw sub ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
143  ret i16 %res
144}
145
146define i16 @test_atomicrmw_and_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
147; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent(
148; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
149; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
150; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
151; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
152; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
153; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
154; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
155; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
156; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
157; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
158; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
159; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
160; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
161; CHECK-NEXT:    ret i16 [[EXTRACTED]]
162;
163  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
164  ret i16 %res
165}
166
167define i16 @test_atomicrmw_and_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
168; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4(
169; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
170; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
171; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
172; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
173; CHECK-NEXT:    ret i16 [[EXTRACTED]]
174;
175  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
176  ret i16 %res
177}
178
179; Drop unknown metadata and noundef
180define i16 @test_atomicrmw_and_i16_global_agent_drop_md(ptr addrspace(1) %ptr, i16 %value) {
181; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_drop_md(
182; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
183; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
184; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
185; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
186; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
187; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
188; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
189; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
190; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
191; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
192; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
193; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
194; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
195; CHECK-NEXT:    ret i16 [[EXTRACTED]]
196;
197  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !some.unknown.md !0
198  ret i16 %res
199}
200
201; Drop unknown metadata
202define i16 @test_atomicrmw_and_i16_global_agent_align4_drop_md(ptr addrspace(1) %ptr, i16 %value) {
203; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_drop_md(
204; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
205; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
206; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
207; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
208; CHECK-NEXT:    ret i16 [[EXTRACTED]]
209;
210  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !some.unknown.md !0
211  ret i16 %res
212}
213
214; Drop noundef, preserve mmra
215define i16 @test_atomicrmw_and_i16_global_agent_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) {
216; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_preserve_mmra(
217; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
218; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
219; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
220; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
221; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
222; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
223; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
224; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
225; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
226; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
227; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0:![0-9]+]]
228; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
229; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
230; CHECK-NEXT:    ret i16 [[EXTRACTED]]
231;
232  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !noundef !0, !mmra !1
233  ret i16 %res
234}
235
236; Drop noundef, preserve mmra
237define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(ptr addrspace(1) %ptr, i16 %value) {
238; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_mmra(
239; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
240; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
241; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !mmra [[META0]]
242; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
243; CHECK-NEXT:    ret i16 [[EXTRACTED]]
244;
245  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noundef !0, !mmra !1
246  ret i16 %res
247}
248
249define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(ptr addrspace(1) %ptr, i16 %value) {
250; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_alias_scope(
251; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
252; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
253; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !alias.scope [[META1:![0-9]+]]
254; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
255; CHECK-NEXT:    ret i16 [[EXTRACTED]]
256;
257  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !alias.scope !2
258  ret i16 %res
259}
260
261define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(ptr addrspace(1) %ptr, i16 %value) {
262; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_noalias(
263; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
264; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
265; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !noalias [[META1]]
266; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
267; CHECK-NEXT:    ret i16 [[EXTRACTED]]
268;
269  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !noalias !2
270  ret i16 %res
271}
272
273define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(ptr addrspace(1) %ptr, i16 %value) {
274; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa_struct(
275; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
276; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
277; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa.struct [[TBAA_STRUCT4:![0-9]+]]
278; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
279; CHECK-NEXT:    ret i16 [[EXTRACTED]]
280;
281  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa.struct !5
282  ret i16 %res
283}
284
285define i16 @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(ptr addrspace(1) %ptr, i16 %value) {
286; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4_preserve_tbaa(
287; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
288; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
289; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !tbaa [[TBAA5:![0-9]+]]
290; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
291; CHECK-NEXT:    ret i16 [[EXTRACTED]]
292;
293  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !tbaa !6
294  ret i16 %res
295}
296
297define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) {
298; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_remote_memory(
299; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
300; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
301; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
302; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
303; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
304; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
305; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
306; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
307; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
308; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
309; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8:![0-9]+]]
310; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
311; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
312; CHECK-NEXT:    ret i16 [[EXTRACTED]]
313;
314  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !amdgpu.no.remote.memory !0
315  ret i16 %res
316}
317
318define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(ptr addrspace(1) %ptr, i16 %value) {
319; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_remote_memory(
320; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
321; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
322; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory [[META8]]
323; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
324; CHECK-NEXT:    ret i16 [[EXTRACTED]]
325;
326  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !amdgpu.no.remote.memory !0
327  ret i16 %res
328}
329
330define i16 @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i16 %value) {
331; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent__amdgpu_no_fine_grained_memory(
332; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
333; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
334; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
335; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
336; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
337; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
338; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
339; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
340; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
341; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
342; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
343; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
344; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
345; CHECK-NEXT:    ret i16 [[EXTRACTED]]
346;
347  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
348  ret i16 %res
349}
350
351define i16 @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, i16 %value) {
352; CHECK-LABEL: @test_atomicrmw_and_i16_global_agent_align4__amdgpu_no_fine_grained_memory(
353; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
354; CHECK-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[TMP1]], -65536
355; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw and ptr addrspace(1) [[PTR:%.*]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META8]]
356; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
357; CHECK-NEXT:    ret i16 [[EXTRACTED]]
358;
359  %res = atomicrmw and ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0
360  ret i16 %res
361}
362
363define i16 @test_atomicrmw_nand_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
364; CHECK-LABEL: @test_atomicrmw_nand_i16_global_agent(
365; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
366; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
367; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
368; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
369; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
370; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
371; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
372; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
373; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
374; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
375; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
376; CHECK:       atomicrmw.start:
377; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
378; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
379; CHECK-NEXT:    [[NEW:%.*]] = xor i32 [[TMP5]], -1
380; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]]
381; CHECK-NEXT:    [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
382; CHECK-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]]
383; CHECK-NEXT:    [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] syncscope("agent") seq_cst seq_cst, align 4
384; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1
385; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0
386; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
387; CHECK:       atomicrmw.end:
388; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
389; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
390; CHECK-NEXT:    ret i16 [[EXTRACTED]]
391;
392  %res = atomicrmw nand ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
393  ret i16 %res
394}
395
396define i16 @test_atomicrmw_or_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
397; CHECK-LABEL: @test_atomicrmw_or_i16_global_agent(
398; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
399; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
400; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
401; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
402; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
403; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
404; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
405; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
406; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
407; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
408; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
409; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
410; CHECK-NEXT:    ret i16 [[EXTRACTED]]
411;
412  %res = atomicrmw or ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
413  ret i16 %res
414}
415
416define i16 @test_atomicrmw_xor_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
417; CHECK-LABEL: @test_atomicrmw_xor_i16_global_agent(
418; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
419; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
420; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
421; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
422; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
423; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
424; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
425; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
426; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
427; CHECK-NEXT:    [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
428; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
429; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
430; CHECK-NEXT:    ret i16 [[EXTRACTED]]
431;
432  %res = atomicrmw xor ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
433  ret i16 %res
434}
435
436define i16 @test_atomicrmw_max_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
437; CHECK-LABEL: @test_atomicrmw_max_i16_global_agent(
438; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
439; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
440; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
441; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
442; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
443; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
444; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
445; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
446; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
447; CHECK:       atomicrmw.start:
448; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
449; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
450; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
451; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i16 [[EXTRACTED]], [[VALUE:%.*]]
452; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
453; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
454; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
455; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
456; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
457; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
458; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
459; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
460; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
461; CHECK:       atomicrmw.end:
462; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
463; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
464; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
465;
466  %res = atomicrmw max ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
467  ret i16 %res
468}
469
470define i16 @test_atomicrmw_min_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
471; CHECK-LABEL: @test_atomicrmw_min_i16_global_agent(
472; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
473; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
474; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
475; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
476; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
477; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
478; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
479; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
480; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
481; CHECK:       atomicrmw.start:
482; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
483; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
484; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
485; CHECK-NEXT:    [[TMP4:%.*]] = icmp sle i16 [[EXTRACTED]], [[VALUE:%.*]]
486; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
487; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
488; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
489; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
490; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
491; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
492; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
493; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
494; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
495; CHECK:       atomicrmw.end:
496; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
497; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
498; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
499;
500  %res = atomicrmw min ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
501  ret i16 %res
502}
503
504define i16 @test_atomicrmw_umax_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
505; CHECK-LABEL: @test_atomicrmw_umax_i16_global_agent(
506; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
507; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
508; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
509; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
510; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
511; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
512; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
513; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
514; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
515; CHECK:       atomicrmw.start:
516; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
517; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
518; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
519; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
520; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
521; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
522; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
523; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
524; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
525; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
526; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
527; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
528; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
529; CHECK:       atomicrmw.end:
530; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
531; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
532; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
533;
534  %res = atomicrmw umax ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
535  ret i16 %res
536}
537
538define i16 @test_atomicrmw_umin_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
539; CHECK-LABEL: @test_atomicrmw_umin_i16_global_agent(
540; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
541; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
542; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
543; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
544; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
545; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
546; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
547; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
548; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
549; CHECK:       atomicrmw.start:
550; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
551; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
552; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
553; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i16 [[EXTRACTED]], [[VALUE:%.*]]
554; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i16 [[EXTRACTED]], i16 [[VALUE]]
555; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
556; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
557; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
558; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
559; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
560; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
561; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
562; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
563; CHECK:       atomicrmw.end:
564; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
565; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
566; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
567;
568  %res = atomicrmw umin ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
569  ret i16 %res
570}
571
572define i16 @test_cmpxchg_i16_global_agent(ptr addrspace(1) %out, i16 %in, i16 %old) {
573; CHECK-LABEL: @test_cmpxchg_i16_global_agent(
574; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
575; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4)
576; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
577; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
578; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
579; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
580; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
581; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
582; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32
583; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
584; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32
585; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
586; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
587; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
588; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
589; CHECK:       partword.cmpxchg.loop:
590; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
591; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
592; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
593; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4
594; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
595; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
596; CHECK-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
597; CHECK:       partword.cmpxchg.failure:
598; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
599; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
600; CHECK-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
601; CHECK:       partword.cmpxchg.end:
602; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
603; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
604; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
605; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
606; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
607; CHECK-NEXT:    ret i16 [[EXTRACT]]
608;
609  %gep = getelementptr i16, ptr addrspace(1) %out, i64 4
610  %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst
611  %extract = extractvalue {i16, i1} %res, 0
612  ret i16 %extract
613}
614
615define i16 @test_cmpxchg_i16_global_agent_align4(ptr addrspace(1) %out, i16 %in, i16 %old) {
616; CHECK-LABEL: @test_cmpxchg_i16_global_agent_align4(
617; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr addrspace(1) [[OUT:%.*]], i64 4
618; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[IN:%.*]] to i32
619; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[OLD:%.*]] to i32
620; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
621; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], -65536
622; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
623; CHECK:       partword.cmpxchg.loop:
624; CHECK-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[TMP11:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
625; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP1]]
626; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP2]]
627; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[GEP]], i32 [[TMP7]], i32 [[TMP6]] seq_cst seq_cst, align 4
628; CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0
629; CHECK-NEXT:    [[TMP10:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
630; CHECK-NEXT:    br i1 [[TMP10]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
631; CHECK:       partword.cmpxchg.failure:
632; CHECK-NEXT:    [[TMP11]] = and i32 [[TMP9]], -65536
633; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i32 [[TMP5]], [[TMP11]]
634; CHECK-NEXT:    br i1 [[TMP12]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
635; CHECK:       partword.cmpxchg.end:
636; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP9]] to i16
637; CHECK-NEXT:    [[TMP13:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
638; CHECK-NEXT:    [[TMP14:%.*]] = insertvalue { i16, i1 } [[TMP13]], i1 [[TMP10]], 1
639; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP14]], 0
640; CHECK-NEXT:    ret i16 [[EXTRACT]]
641;
642  %gep = getelementptr i16, ptr addrspace(1) %out, i64 4
643  %res = cmpxchg ptr addrspace(1) %gep, i16 %old, i16 %in seq_cst seq_cst, align 4
644  %extract = extractvalue {i16, i1} %res, 0
645  ret i16 %extract
646}
647
648define i16 @test_atomicrmw_xchg_i16_local(ptr addrspace(3) %ptr, i16 %value) {
649; CHECK-LABEL: @test_atomicrmw_xchg_i16_local(
650; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
651; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
652; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
653; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
654; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
655; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
656; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
657; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
658; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
659; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
660; CHECK:       atomicrmw.start:
661; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
662; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
663; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
664; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] seq_cst seq_cst, align 4
665; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
666; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
667; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
668; CHECK:       atomicrmw.end:
669; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
670; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
671; CHECK-NEXT:    ret i16 [[EXTRACTED]]
672;
673  %res = atomicrmw xchg ptr addrspace(3) %ptr, i16 %value seq_cst
674  ret i16 %res
675}
676
677define i16 @test_cmpxchg_i16_local(ptr addrspace(3) %out, i16 %in, i16 %old) {
678; CHECK-LABEL: @test_cmpxchg_i16_local(
679; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr addrspace(3) [[OUT:%.*]], i64 4
680; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[GEP]], i32 -4)
681; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32
682; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
683; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
684; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
685; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
686; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[IN:%.*]] to i32
687; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[TMP2]]
688; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[OLD:%.*]] to i32
689; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[TMP2]]
690; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
691; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
692; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
693; CHECK:       partword.cmpxchg.loop:
694; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
695; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
696; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
697; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4
698; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
699; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
700; CHECK-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
701; CHECK:       partword.cmpxchg.failure:
702; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
703; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
704; CHECK-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
705; CHECK:       partword.cmpxchg.end:
706; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[TMP2]]
707; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
708; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i16, i1 } poison, i16 [[EXTRACTED]], 0
709; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i16, i1 } [[TMP17]], i1 [[TMP14]], 1
710; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i16, i1 } [[TMP18]], 0
711; CHECK-NEXT:    ret i16 [[EXTRACT]]
712;
713  %gep = getelementptr i16, ptr addrspace(3) %out, i64 4
714  %res = cmpxchg ptr addrspace(3) %gep, i16 %old, i16 %in seq_cst seq_cst
715  %extract = extractvalue {i16, i1} %res, 0
716  ret i16 %extract
717}
718
719define i16 @test_atomicrmw_xor_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) {
720; CHECK-LABEL: @test_atomicrmw_xor_i16_local_align4(
721; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
722; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw xor ptr addrspace(3) [[PTR:%.*]], i32 [[TMP1]] seq_cst, align 4
723; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[TMP2]] to i16
724; CHECK-NEXT:    ret i16 [[EXTRACTED]]
725;
726  %res = atomicrmw xor ptr addrspace(3) %ptr, i16 %value seq_cst, align 4
727  ret i16 %res
728}
729
730define i16 @test_atomicrmw_inc_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
731; CHECK-LABEL: @test_atomicrmw_inc_i16_global_agent(
732; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
733; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
734; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
735; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
736; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
737; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
738; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
739; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
740; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
741; CHECK:       atomicrmw.start:
742; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
743; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
744; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
745; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
746; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
747; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
748; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
749; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
750; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
751; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
752; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
753; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
754; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
755; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
756; CHECK:       atomicrmw.end:
757; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
758; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
759; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
760;
761  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
762  ret i16 %res
763}
764
765define i16 @test_atomicrmw_inc_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
766; CHECK-LABEL: @test_atomicrmw_inc_i16_global_agent_align4(
767; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
768; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
769; CHECK:       atomicrmw.start:
770; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
771; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
772; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
773; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
774; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
775; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
776; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
777; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
778; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
779; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
780; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
781; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
782; CHECK:       atomicrmw.end:
783; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
784; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
785;
786  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
787  ret i16 %res
788}
789
790define i16 @test_atomicrmw_inc_i16_local(ptr addrspace(3) %ptr, i16 %value) {
791; CHECK-LABEL: @test_atomicrmw_inc_i16_local(
792; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
793; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
794; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
795; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
796; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
797; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
798; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
799; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
800; CHECK:       atomicrmw.start:
801; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
802; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
803; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
804; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
805; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
806; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
807; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
808; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
809; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
810; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
811; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
812; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
813; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
814; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
815; CHECK:       atomicrmw.end:
816; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
817; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
818; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
819;
820  %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i16 %value syncscope("agent") seq_cst
821  ret i16 %res
822}
823
824define i16 @test_atomicrmw_inc_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) {
825; CHECK-LABEL: @test_atomicrmw_inc_i16_local_align4(
826; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4
827; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
828; CHECK:       atomicrmw.start:
829; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
830; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
831; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
832; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
833; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
834; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
835; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
836; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
837; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
838; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
839; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
840; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
841; CHECK:       atomicrmw.end:
842; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
843; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
844;
845  %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i16 %value syncscope("agent") seq_cst, align 4
846  ret i16 %res
847}
848
849define i16 @test_atomicrmw_inc_i16_flat_agent(ptr %ptr, i16 %value) {
850; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_agent(
851; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
852; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
853; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
854; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
855; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
856; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
857; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
858; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
859; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
860; CHECK:       atomicrmw.start:
861; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
862; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
863; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
864; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[EXTRACTED]], 1
865; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
866; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 0, i16 [[TMP4]]
867; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
868; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
869; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
870; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
871; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
872; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
873; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
874; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
875; CHECK:       atomicrmw.end:
876; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
877; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
878; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
879;
880  %res = atomicrmw uinc_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst
881  ret i16 %res
882}
883
884define i16 @test_atomicrmw_inc_i16_flat_agent_align4(ptr %ptr, i16 %value) {
885; CHECK-LABEL: @test_atomicrmw_inc_i16_flat_agent_align4(
886; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
887; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
888; CHECK:       atomicrmw.start:
889; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
890; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
891; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[EXTRACTED]], 1
892; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i16 [[EXTRACTED]], [[VALUE:%.*]]
893; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
894; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
895; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
896; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
897; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
898; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
899; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
900; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
901; CHECK:       atomicrmw.end:
902; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
903; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
904;
905  %res = atomicrmw uinc_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst, align 4
906  ret i16 %res
907}
908
909define i16 @test_atomicrmw_dec_i16_global_agent(ptr addrspace(1) %ptr, i16 %value) {
910; CHECK-LABEL: @test_atomicrmw_dec_i16_global_agent(
911; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
912; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
913; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
914; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
915; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
916; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
917; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
918; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
919; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
920; CHECK:       atomicrmw.start:
921; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
922; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
923; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
924; CHECK-NEXT:    [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
925; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
926; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
927; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
928; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
929; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
930; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
931; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
932; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
933; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
934; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
935; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
936; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
937; CHECK:       atomicrmw.end:
938; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
939; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
940; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
941;
942  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst
943  ret i16 %res
944}
945
946define i16 @test_atomicrmw_dec_i16_global_agent_align4(ptr addrspace(1) %ptr, i16 %value) {
947; CHECK-LABEL: @test_atomicrmw_dec_i16_global_agent_align4(
948; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
949; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
950; CHECK:       atomicrmw.start:
951; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
952; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
953; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
954; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
955; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
956; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
957; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
958; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
959; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
960; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
961; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
962; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
963; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
964; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
965; CHECK:       atomicrmw.end:
966; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
967; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
968;
969  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i16 %value syncscope("agent") seq_cst, align 4
970  ret i16 %res
971}
972
973define i16 @test_atomicrmw_dec_i16_local(ptr addrspace(3) %ptr, i16 %value) {
974; CHECK-LABEL: @test_atomicrmw_dec_i16_local(
975; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
976; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
977; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
978; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
979; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
980; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
981; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
982; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
983; CHECK:       atomicrmw.start:
984; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
985; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
986; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
987; CHECK-NEXT:    [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
988; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
989; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
990; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
991; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
992; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
993; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
994; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
995; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
996; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
997; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
998; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
999; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1000; CHECK:       atomicrmw.end:
1001; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1002; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
1003; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
1004;
1005  %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i16 %value seq_cst
1006  ret i16 %res
1007}
1008
1009define i16 @test_atomicrmw_dec_i16_local_align4(ptr addrspace(3) %ptr, i16 %value) {
1010; CHECK-LABEL: @test_atomicrmw_dec_i16_local_align4(
1011; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4
1012; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1013; CHECK:       atomicrmw.start:
1014; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1015; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
1016; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
1017; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
1018; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
1019; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
1020; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
1021; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
1022; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
1023; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1024; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
1025; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1026; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1027; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1028; CHECK:       atomicrmw.end:
1029; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
1030; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
1031;
1032  %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i16 %value seq_cst, align 4
1033  ret i16 %res
1034}
1035
1036define i16 @test_atomicrmw_dec_i16_flat_agent(ptr %ptr, i16 %value) {
1037; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_agent(
1038; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1039; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1040; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1041; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1042; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1043; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
1044; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1045; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1046; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1047; CHECK:       atomicrmw.start:
1048; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1049; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1050; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1051; CHECK-NEXT:    [[TMP4:%.*]] = sub i16 [[EXTRACTED]], 1
1052; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i16 [[EXTRACTED]], 0
1053; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
1054; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1055; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i16 [[VALUE]], i16 [[TMP4]]
1056; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
1057; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1058; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1059; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1060; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1061; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1062; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1063; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1064; CHECK:       atomicrmw.end:
1065; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1066; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i16
1067; CHECK-NEXT:    ret i16 [[EXTRACTED3]]
1068;
1069  %res = atomicrmw udec_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst
1070  ret i16 %res
1071}
1072
1073define i16 @test_atomicrmw_dec_i16_flat_agent_align4(ptr %ptr, i16 %value) {
1074; CHECK-LABEL: @test_atomicrmw_dec_i16_flat_agent_align4(
1075; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1076; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1077; CHECK:       atomicrmw.start:
1078; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1079; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i16
1080; CHECK-NEXT:    [[TMP2:%.*]] = sub i16 [[EXTRACTED]], 1
1081; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i16 [[EXTRACTED]], 0
1082; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i16 [[EXTRACTED]], [[VALUE:%.*]]
1083; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
1084; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i16 [[VALUE]], i16 [[TMP2]]
1085; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i16 [[NEW]] to i32
1086; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -65536
1087; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1088; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1089; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1090; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1091; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1092; CHECK:       atomicrmw.end:
1093; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i16
1094; CHECK-NEXT:    ret i16 [[EXTRACTED1]]
1095;
1096  %res = atomicrmw udec_wrap ptr %ptr, i16 %value syncscope("agent") seq_cst, align 4
1097  ret i16 %res
1098}
1099
1100define half @test_atomicrmw_xchg_f16_global_agent(ptr addrspace(1) %ptr, half %value) {
1101; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_agent(
1102; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
1103; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
1104; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1105; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1106; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1107; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
1108; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1109; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
1110; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
1111; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
1112; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1113; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1114; CHECK:       atomicrmw.start:
1115; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1116; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1117; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
1118; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1119; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1120; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1121; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1122; CHECK:       atomicrmw.end:
1123; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1124; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1125; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
1126; CHECK-NEXT:    ret half [[TMP9]]
1127;
1128  %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value syncscope("agent") seq_cst
1129  ret half %res
1130}
1131
1132define half @test_atomicrmw_xchg_f16_global_agent_align4(ptr addrspace(1) %ptr, half %value) {
1133; CHECK-LABEL: @test_atomicrmw_xchg_f16_global_agent_align4(
1134; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
1135; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
1136; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
1137; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1138; CHECK:       atomicrmw.start:
1139; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1140; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
1141; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
1142; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1143; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1144; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1145; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1146; CHECK:       atomicrmw.end:
1147; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1148; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
1149; CHECK-NEXT:    ret half [[TMP7]]
1150;
1151  %res = atomicrmw xchg ptr addrspace(1) %ptr, half %value syncscope("agent") seq_cst, align 4
1152  ret half %res
1153}
1154
1155define half @test_atomicrmw_xchg_f16_flat_agent(ptr %ptr, half %value) {
1156; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_agent(
1157; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1158; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1159; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1160; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1161; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1162; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
1163; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1164; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half [[VALUE:%.*]] to i16
1165; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
1166; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
1167; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1168; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1169; CHECK:       atomicrmw.start:
1170; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1171; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1172; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
1173; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1174; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1175; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1176; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1177; CHECK:       atomicrmw.end:
1178; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1179; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1180; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to half
1181; CHECK-NEXT:    ret half [[TMP9]]
1182;
1183  %res = atomicrmw xchg ptr %ptr, half %value syncscope("agent") seq_cst
1184  ret half %res
1185}
1186
1187define half @test_atomicrmw_xchg_f16_flat_agent_align4(ptr %ptr, half %value) {
1188; CHECK-LABEL: @test_atomicrmw_xchg_f16_flat_agent_align4(
1189; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[VALUE:%.*]] to i16
1190; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
1191; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1192; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1193; CHECK:       atomicrmw.start:
1194; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1195; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
1196; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
1197; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1198; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1199; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1200; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1201; CHECK:       atomicrmw.end:
1202; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1203; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to half
1204; CHECK-NEXT:    ret half [[TMP7]]
1205;
1206  %res = atomicrmw xchg ptr %ptr, half %value syncscope("agent") seq_cst, align 4
1207  ret half %res
1208}
1209
1210define bfloat @test_atomicrmw_xchg_bf16_global_agent(ptr addrspace(1) %ptr, bfloat %value) {
1211; CHECK-LABEL: @test_atomicrmw_xchg_bf16_global_agent(
1212; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
1213; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
1214; CHECK-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1215; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1216; CHECK-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1217; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[SHIFTAMT]]
1218; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1219; CHECK-NEXT:    [[TMP3:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
1220; CHECK-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
1221; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP4]], [[SHIFTAMT]]
1222; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1223; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1224; CHECK:       atomicrmw.start:
1225; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP5]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1226; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1227; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[VALOPERAND_SHIFTED]]
1228; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1229; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1230; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1231; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1232; CHECK:       atomicrmw.end:
1233; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1234; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1235; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
1236; CHECK-NEXT:    ret bfloat [[TMP9]]
1237;
1238  %res = atomicrmw xchg ptr addrspace(1) %ptr, bfloat %value syncscope("agent") seq_cst
1239  ret bfloat %res
1240}
1241
1242define bfloat @test_atomicrmw_xchg_bf16_global_agent_align4(ptr addrspace(1) %ptr, bfloat %value) {
1243; CHECK-LABEL: @test_atomicrmw_xchg_bf16_global_agent_align4(
1244; CHECK-NEXT:    [[TMP1:%.*]] = bitcast bfloat [[VALUE:%.*]] to i16
1245; CHECK-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
1246; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
1247; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1248; CHECK:       atomicrmw.start:
1249; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1250; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
1251; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]]
1252; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1253; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1254; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1255; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1256; CHECK:       atomicrmw.end:
1257; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1258; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[EXTRACTED]] to bfloat
1259; CHECK-NEXT:    ret bfloat [[TMP7]]
1260;
1261  %res = atomicrmw xchg ptr addrspace(1) %ptr, bfloat %value syncscope("agent") seq_cst, align 4
1262  ret bfloat %res
1263}
1264
1265define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) {
1266; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent(
1267; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1268; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1269; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1270; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1271; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
1272; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1273; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
1274; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1275; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1276; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1277; CHECK:       atomicrmw.start:
1278; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1279; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1280; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
1281; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
1282; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
1283; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
1284; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1285; CHECK:       atomicrmw.end:
1286; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1287; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1288; CHECK-NEXT:    ret i16 [[EXTRACTED]]
1289;
1290  %res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst
1291  ret i16 %res
1292}
1293
1294define i16 @test_atomicrmw_xchg_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) {
1295; CHECK-LABEL: @test_atomicrmw_xchg_i16_buffer_fat_agent_align4(
1296; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
1297; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1298; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1299; CHECK:       atomicrmw.start:
1300; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1301; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[LOADED]], -65536
1302; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
1303; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4
1304; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1305; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
1306; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1307; CHECK:       atomicrmw.end:
1308; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1309; CHECK-NEXT:    ret i16 [[EXTRACTED]]
1310;
1311  %res = atomicrmw xchg ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4
1312  ret i16 %res
1313}
1314
1315define i16 @test_atomicrmw_add_i16_buffer_fat_agent(ptr addrspace(7) %ptr, i16 %value) {
1316; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent(
1317; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1318; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1319; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1320; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1321; CHECK-NEXT:    [[MASK:%.*]] = shl i32 65535, [[TMP2]]
1322; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1323; CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[VALUE:%.*]] to i32
1324; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1325; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1326; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1327; CHECK:       atomicrmw.start:
1328; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1329; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
1330; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
1331; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1332; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
1333; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1334; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1335; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1336; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1337; CHECK:       atomicrmw.end:
1338; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1339; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i16
1340; CHECK-NEXT:    ret i16 [[EXTRACTED]]
1341;
1342  %res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst
1343  ret i16 %res
1344}
1345
1346define i16 @test_atomicrmw_add_i16_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i16 %value) {
1347; CHECK-LABEL: @test_atomicrmw_add_i16_buffer_fat_agent_align4(
1348; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[VALUE:%.*]] to i32
1349; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1350; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1351; CHECK:       atomicrmw.start:
1352; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1353; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
1354; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[NEW]], 65535
1355; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -65536
1356; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
1357; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1358; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1359; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1360; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1361; CHECK:       atomicrmw.end:
1362; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i16
1363; CHECK-NEXT:    ret i16 [[EXTRACTED]]
1364;
1365  %res = atomicrmw add ptr addrspace(7) %ptr, i16 %value syncscope("agent") seq_cst, align 4
1366  ret i16 %res
1367}
1368
1369!0 = !{}
1370!1 = !{!"foo", !"bar"}
1371!2 = !{!3}
1372!3 = distinct !{!3, !4}
1373!4 = distinct !{!4}
1374!5 = !{i64 0, i64 4, !1, i64 8, i64 4}
1375!6 = !{!7, !7, i64 0}
1376!7 = !{!"omnipotent char", !8, i64 0}
1377!8 = !{!"Simple C/C++ TBAA"}
1378
1379;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1380; BASE: {{.*}}
1381; GCN: {{.*}}
1382; GFX940: {{.*}}
1383; R600: {{.*}}
1384