xref: /llvm-project/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-i8.ll (revision f3afdc4ad980bfba5c196f2248bedf03945cd32a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=atomic-expand %s | FileCheck %s --check-prefixes=CHECK,GCN
3; RUN: opt -mtriple=r600-mesa-mesa3d -S -passes=atomic-expand %s | FileCheck %s --check-prefixes=CHECK,R600
4
5define i8 @test_atomicrmw_xchg_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
6; GCN-LABEL: @test_atomicrmw_xchg_i8_global_agent(
7; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
8; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
9; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
10; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
11; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
12; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
13; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
14; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
15; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
16; GCN-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
17; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
18; GCN:       atomicrmw.start:
19; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
20; GCN-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
21; GCN-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
22; GCN-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
23; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
24; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
25; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
26; GCN:       atomicrmw.end:
27; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
28; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
29; GCN-NEXT:    ret i8 [[EXTRACTED]]
30;
31; R600-LABEL: @test_atomicrmw_xchg_i8_global_agent(
32; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
33; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
34; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
35; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
36; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
37; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
38; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
39; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
40; R600-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
41; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
42; R600:       atomicrmw.start:
43; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
44; R600-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
45; R600-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
46; R600-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
47; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
48; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
49; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
50; R600:       atomicrmw.end:
51; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
52; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
53; R600-NEXT:    ret i8 [[EXTRACTED]]
54;
55  %res = atomicrmw xchg ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
56  ret i8 %res
57}
58
59define i8 @test_atomicrmw_add_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
60; GCN-LABEL: @test_atomicrmw_add_i8_global_agent(
61; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
62; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
63; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
64; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
65; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
66; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
67; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
68; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
69; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
70; GCN-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
71; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
72; GCN:       atomicrmw.start:
73; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
74; GCN-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
75; GCN-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
76; GCN-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
77; GCN-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
78; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
79; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
80; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
81; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
82; GCN:       atomicrmw.end:
83; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
84; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
85; GCN-NEXT:    ret i8 [[EXTRACTED]]
86;
87; R600-LABEL: @test_atomicrmw_add_i8_global_agent(
88; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
89; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
90; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
91; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
92; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
93; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
94; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
95; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
96; R600-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
97; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
98; R600:       atomicrmw.start:
99; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
100; R600-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
101; R600-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
102; R600-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
103; R600-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
104; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
105; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
106; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
107; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
108; R600:       atomicrmw.end:
109; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
110; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
111; R600-NEXT:    ret i8 [[EXTRACTED]]
112;
113  %res = atomicrmw add ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
114  ret i8 %res
115}
116
117define i8 @test_atomicrmw_add_i8_global_agent_align2(ptr addrspace(1) %ptr, i8 %value) {
118; GCN-LABEL: @test_atomicrmw_add_i8_global_agent_align2(
119; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
120; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
121; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
122; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
123; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
124; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
125; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
126; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
127; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
128; GCN-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
129; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
130; GCN:       atomicrmw.start:
131; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
132; GCN-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
133; GCN-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
134; GCN-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
135; GCN-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
136; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
137; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
138; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
139; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
140; GCN:       atomicrmw.end:
141; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
142; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
143; GCN-NEXT:    ret i8 [[EXTRACTED]]
144;
145; R600-LABEL: @test_atomicrmw_add_i8_global_agent_align2(
146; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
147; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
148; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
149; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
150; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
151; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
152; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
153; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
154; R600-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
155; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
156; R600:       atomicrmw.start:
157; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
158; R600-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
159; R600-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
160; R600-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
161; R600-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
162; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
163; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
164; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
165; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
166; R600:       atomicrmw.end:
167; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
168; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
169; R600-NEXT:    ret i8 [[EXTRACTED]]
170;
171  %res = atomicrmw add ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 2
172  ret i8 %res
173}
174
175define i8 @test_atomicrmw_add_i8_global_agent_align4(ptr addrspace(1) %ptr, i8 %value) {
176; CHECK-LABEL: @test_atomicrmw_add_i8_global_agent_align4(
177; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32
178; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
179; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
180; CHECK:       atomicrmw.start:
181; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
182; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
183; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[NEW]], 255
184; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -256
185; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
186; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
187; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
188; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
189; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
190; CHECK:       atomicrmw.end:
191; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8
192; CHECK-NEXT:    ret i8 [[EXTRACTED]]
193;
194  %res = atomicrmw add ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 4
195  ret i8 %res
196}
197
198define i8 @test_atomicrmw_sub_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
199; GCN-LABEL: @test_atomicrmw_sub_i8_global_agent(
200; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
201; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
202; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
203; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
204; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
205; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
206; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
207; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
208; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
209; GCN-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
210; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
211; GCN:       atomicrmw.start:
212; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
213; GCN-NEXT:    [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
214; GCN-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
215; GCN-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
216; GCN-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
217; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
218; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
219; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
220; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
221; GCN:       atomicrmw.end:
222; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
223; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
224; GCN-NEXT:    ret i8 [[EXTRACTED]]
225;
226; R600-LABEL: @test_atomicrmw_sub_i8_global_agent(
227; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
228; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
229; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
230; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
231; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
232; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
233; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
234; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
235; R600-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
236; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
237; R600:       atomicrmw.start:
238; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
239; R600-NEXT:    [[NEW:%.*]] = sub i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
240; R600-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
241; R600-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
242; R600-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
243; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
244; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
245; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
246; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
247; R600:       atomicrmw.end:
248; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
249; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
250; R600-NEXT:    ret i8 [[EXTRACTED]]
251;
252  %res = atomicrmw sub ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
253  ret i8 %res
254}
255
256define i8 @test_atomicrmw_and_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
257; GCN-LABEL: @test_atomicrmw_and_i8_global_agent(
258; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
259; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
260; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
261; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
262; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
263; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
264; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
265; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
266; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
267; GCN-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
268; GCN-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
269; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
270; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
271; GCN-NEXT:    ret i8 [[EXTRACTED]]
272;
273; R600-LABEL: @test_atomicrmw_and_i8_global_agent(
274; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
275; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
276; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
277; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
278; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
279; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
280; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
281; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
282; R600-NEXT:    [[ANDOPERAND:%.*]] = or i32 [[VALOPERAND_SHIFTED]], [[INV_MASK]]
283; R600-NEXT:    [[TMP4:%.*]] = atomicrmw and ptr addrspace(1) [[ALIGNEDADDR]], i32 [[ANDOPERAND]] syncscope("agent") seq_cst, align 4
284; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[TMP2]]
285; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
286; R600-NEXT:    ret i8 [[EXTRACTED]]
287;
288  %res = atomicrmw and ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
289  ret i8 %res
290}
291
292define i8 @test_atomicrmw_nand_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
293; GCN-LABEL: @test_atomicrmw_nand_i8_global_agent(
294; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
295; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
296; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
297; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
298; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
299; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
300; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
301; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
302; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
303; GCN-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
304; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
305; GCN:       atomicrmw.start:
306; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
307; GCN-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
308; GCN-NEXT:    [[NEW:%.*]] = xor i32 [[TMP5]], -1
309; GCN-NEXT:    [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]]
310; GCN-NEXT:    [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
311; GCN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]]
312; GCN-NEXT:    [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] syncscope("agent") seq_cst seq_cst, align 4
313; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1
314; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0
315; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
316; GCN:       atomicrmw.end:
317; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
318; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
319; GCN-NEXT:    ret i8 [[EXTRACTED]]
320;
321; R600-LABEL: @test_atomicrmw_nand_i8_global_agent(
322; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
323; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
324; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
325; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
326; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
327; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
328; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
329; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
330; R600-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
331; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
332; R600:       atomicrmw.start:
333; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
334; R600-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
335; R600-NEXT:    [[NEW:%.*]] = xor i32 [[TMP5]], -1
336; R600-NEXT:    [[TMP6:%.*]] = and i32 [[NEW]], [[MASK]]
337; R600-NEXT:    [[TMP7:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
338; R600-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP6]]
339; R600-NEXT:    [[TMP9:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP8]] syncscope("agent") seq_cst seq_cst, align 4
340; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP9]], 1
341; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP9]], 0
342; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
343; R600:       atomicrmw.end:
344; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
345; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
346; R600-NEXT:    ret i8 [[EXTRACTED]]
347;
348  %res = atomicrmw nand ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
349  ret i8 %res
350}
351
352define i8 @test_atomicrmw_or_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
353; GCN-LABEL: @test_atomicrmw_or_i8_global_agent(
354; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
355; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
356; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
357; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
358; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
359; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
360; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
361; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
362; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
363; GCN-NEXT:    [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
364; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
365; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
366; GCN-NEXT:    ret i8 [[EXTRACTED]]
367;
368; R600-LABEL: @test_atomicrmw_or_i8_global_agent(
369; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
370; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
371; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
372; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
373; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
374; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
375; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
376; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
377; R600-NEXT:    [[TMP4:%.*]] = atomicrmw or ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
378; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[TMP2]]
379; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
380; R600-NEXT:    ret i8 [[EXTRACTED]]
381;
382  %res = atomicrmw or ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
383  ret i8 %res
384}
385
386define i8 @test_atomicrmw_xor_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
387; GCN-LABEL: @test_atomicrmw_xor_i8_global_agent(
388; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
389; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
390; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
391; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
392; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
393; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
394; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
395; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
396; GCN-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
397; GCN-NEXT:    [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
398; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[SHIFTAMT]]
399; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
400; GCN-NEXT:    ret i8 [[EXTRACTED]]
401;
402; R600-LABEL: @test_atomicrmw_xor_i8_global_agent(
403; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
404; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
405; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
406; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
407; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
408; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
409; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
410; R600-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
411; R600-NEXT:    [[TMP4:%.*]] = atomicrmw xor ptr addrspace(1) [[ALIGNEDADDR]], i32 [[VALOPERAND_SHIFTED]] syncscope("agent") seq_cst, align 4
412; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP4]], [[TMP2]]
413; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
414; R600-NEXT:    ret i8 [[EXTRACTED]]
415;
416  %res = atomicrmw xor ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
417  ret i8 %res
418}
419
420define i8 @test_atomicrmw_max_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
421; GCN-LABEL: @test_atomicrmw_max_i8_global_agent(
422; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
423; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
424; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
425; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
426; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
427; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
428; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
429; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
430; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
431; GCN:       atomicrmw.start:
432; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
433; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
434; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
435; GCN-NEXT:    [[TMP4:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE:%.*]]
436; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
437; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
438; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
439; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
440; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
441; GCN-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
442; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
443; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
444; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
445; GCN:       atomicrmw.end:
446; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
447; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
448; GCN-NEXT:    ret i8 [[EXTRACTED3]]
449;
450; R600-LABEL: @test_atomicrmw_max_i8_global_agent(
451; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
452; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
453; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
454; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
455; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
456; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
457; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
458; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
459; R600:       atomicrmw.start:
460; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
461; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
462; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
463; R600-NEXT:    [[TMP4:%.*]] = icmp sgt i8 [[EXTRACTED]], [[VALUE:%.*]]
464; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
465; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
466; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
467; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
468; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
469; R600-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
470; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
471; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
472; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
473; R600:       atomicrmw.end:
474; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
475; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
476; R600-NEXT:    ret i8 [[EXTRACTED3]]
477;
478  %res = atomicrmw max ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
479  ret i8 %res
480}
481
482define i8 @test_atomicrmw_min_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
483; GCN-LABEL: @test_atomicrmw_min_i8_global_agent(
484; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
485; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
486; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
487; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
488; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
489; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
490; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
491; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
492; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
493; GCN:       atomicrmw.start:
494; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
495; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
496; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
497; GCN-NEXT:    [[TMP4:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE:%.*]]
498; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
499; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
500; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
501; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
502; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
503; GCN-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
504; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
505; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
506; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
507; GCN:       atomicrmw.end:
508; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
509; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
510; GCN-NEXT:    ret i8 [[EXTRACTED3]]
511;
512; R600-LABEL: @test_atomicrmw_min_i8_global_agent(
513; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
514; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
515; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
516; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
517; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
518; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
519; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
520; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
521; R600:       atomicrmw.start:
522; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
523; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
524; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
525; R600-NEXT:    [[TMP4:%.*]] = icmp sle i8 [[EXTRACTED]], [[VALUE:%.*]]
526; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
527; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
528; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
529; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
530; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
531; R600-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
532; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
533; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
534; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
535; R600:       atomicrmw.end:
536; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
537; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
538; R600-NEXT:    ret i8 [[EXTRACTED3]]
539;
540  %res = atomicrmw min ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
541  ret i8 %res
542}
543
544define i8 @test_atomicrmw_umax_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
545; GCN-LABEL: @test_atomicrmw_umax_i8_global_agent(
546; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
547; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
548; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
549; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
550; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
551; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
552; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
553; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
554; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
555; GCN:       atomicrmw.start:
556; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
557; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
558; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
559; GCN-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
560; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
561; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
562; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
563; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
564; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
565; GCN-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
566; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
567; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
568; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
569; GCN:       atomicrmw.end:
570; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
571; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
572; GCN-NEXT:    ret i8 [[EXTRACTED3]]
573;
574; R600-LABEL: @test_atomicrmw_umax_i8_global_agent(
575; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
576; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
577; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
578; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
579; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
580; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
581; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
582; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
583; R600:       atomicrmw.start:
584; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
585; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
586; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
587; R600-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
588; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
589; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
590; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
591; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
592; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
593; R600-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
594; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
595; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
596; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
597; R600:       atomicrmw.end:
598; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
599; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
600; R600-NEXT:    ret i8 [[EXTRACTED3]]
601;
602  %res = atomicrmw umax ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
603  ret i8 %res
604}
605
606define i8 @test_atomicrmw_umin_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
607; GCN-LABEL: @test_atomicrmw_umin_i8_global_agent(
608; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
609; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
610; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
611; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
612; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
613; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
614; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
615; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
616; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
617; GCN:       atomicrmw.start:
618; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
619; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
620; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
621; GCN-NEXT:    [[TMP4:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE:%.*]]
622; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
623; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
624; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
625; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
626; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
627; GCN-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
628; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
629; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
630; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
631; GCN:       atomicrmw.end:
632; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
633; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
634; GCN-NEXT:    ret i8 [[EXTRACTED3]]
635;
636; R600-LABEL: @test_atomicrmw_umin_i8_global_agent(
637; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
638; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
639; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
640; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
641; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
642; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
643; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
644; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
645; R600:       atomicrmw.start:
646; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
647; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
648; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
649; R600-NEXT:    [[TMP4:%.*]] = icmp ule i8 [[EXTRACTED]], [[VALUE:%.*]]
650; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP4]], i8 [[EXTRACTED]], i8 [[VALUE]]
651; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
652; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
653; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
654; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
655; R600-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
656; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
657; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
658; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
659; R600:       atomicrmw.end:
660; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
661; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
662; R600-NEXT:    ret i8 [[EXTRACTED3]]
663;
664  %res = atomicrmw umin ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
665  ret i8 %res
666}
667
668define i8 @test_cmpxchg_i8_global_agent(ptr addrspace(1) %out, i8 %in, i8 %old) {
669; GCN-LABEL: @test_cmpxchg_i8_global_agent(
670; GCN-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[OUT:%.*]], i64 4
671; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[GEP]], i64 -4)
672; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i64
673; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
674; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
675; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
676; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
677; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
678; GCN-NEXT:    [[TMP3:%.*]] = zext i8 [[IN:%.*]] to i32
679; GCN-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[SHIFTAMT]]
680; GCN-NEXT:    [[TMP5:%.*]] = zext i8 [[OLD:%.*]] to i32
681; GCN-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[SHIFTAMT]]
682; GCN-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
683; GCN-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
684; GCN-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
685; GCN:       partword.cmpxchg.loop:
686; GCN-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
687; GCN-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
688; GCN-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
689; GCN-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] syncscope("agent") seq_cst seq_cst, align 4
690; GCN-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
691; GCN-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
692; GCN-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
693; GCN:       partword.cmpxchg.failure:
694; GCN-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
695; GCN-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
696; GCN-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
697; GCN:       partword.cmpxchg.end:
698; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[SHIFTAMT]]
699; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
700; GCN-NEXT:    [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0
701; GCN-NEXT:    [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1
702; GCN-NEXT:    [[EXTRACT:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0
703; GCN-NEXT:    ret i8 [[EXTRACT]]
704;
705; R600-LABEL: @test_cmpxchg_i8_global_agent(
706; R600-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(1) [[OUT:%.*]], i64 4
707; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[GEP]], i32 -4)
708; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[GEP]] to i32
709; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
710; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
711; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
712; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
713; R600-NEXT:    [[TMP3:%.*]] = zext i8 [[IN:%.*]] to i32
714; R600-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[TMP2]]
715; R600-NEXT:    [[TMP5:%.*]] = zext i8 [[OLD:%.*]] to i32
716; R600-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[TMP2]]
717; R600-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
718; R600-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
719; R600-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
720; R600:       partword.cmpxchg.loop:
721; R600-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
722; R600-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
723; R600-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
724; R600-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] syncscope("agent") seq_cst seq_cst, align 4
725; R600-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
726; R600-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
727; R600-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
728; R600:       partword.cmpxchg.failure:
729; R600-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
730; R600-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
731; R600-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
732; R600:       partword.cmpxchg.end:
733; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[TMP2]]
734; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
735; R600-NEXT:    [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0
736; R600-NEXT:    [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1
737; R600-NEXT:    [[EXTRACT:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0
738; R600-NEXT:    ret i8 [[EXTRACT]]
739;
740  %gep = getelementptr i8, ptr addrspace(1) %out, i64 4
741  %res = cmpxchg ptr addrspace(1) %gep, i8 %old, i8 %in syncscope("agent") seq_cst seq_cst
742  %extract = extractvalue {i8, i1} %res, 0
743  ret i8 %extract
744}
745
746define i8 @test_cmpxchg_i8_local_align2(ptr addrspace(3) %out, i8 %in, i8 %old) {
747; CHECK-LABEL: @test_cmpxchg_i8_local_align2(
748; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) [[OUT:%.*]], i64 4
749; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[GEP]], i32 -4)
750; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32
751; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
752; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
753; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
754; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
755; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[IN:%.*]] to i32
756; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[TMP3]], [[TMP2]]
757; CHECK-NEXT:    [[TMP5:%.*]] = zext i8 [[OLD:%.*]] to i32
758; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP5]], [[TMP2]]
759; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
760; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[INV_MASK]]
761; CHECK-NEXT:    br label [[PARTWORD_CMPXCHG_LOOP:%.*]]
762; CHECK:       partword.cmpxchg.loop:
763; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ [[TMP8]], [[TMP0:%.*]] ], [ [[TMP15:%.*]], [[PARTWORD_CMPXCHG_FAILURE:%.*]] ]
764; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP4]]
765; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP6]]
766; CHECK-NEXT:    [[TMP12:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[TMP11]], i32 [[TMP10]] seq_cst seq_cst, align 4
767; CHECK-NEXT:    [[TMP13:%.*]] = extractvalue { i32, i1 } [[TMP12]], 0
768; CHECK-NEXT:    [[TMP14:%.*]] = extractvalue { i32, i1 } [[TMP12]], 1
769; CHECK-NEXT:    br i1 [[TMP14]], label [[PARTWORD_CMPXCHG_END:%.*]], label [[PARTWORD_CMPXCHG_FAILURE]]
770; CHECK:       partword.cmpxchg.failure:
771; CHECK-NEXT:    [[TMP15]] = and i32 [[TMP13]], [[INV_MASK]]
772; CHECK-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP9]], [[TMP15]]
773; CHECK-NEXT:    br i1 [[TMP16]], label [[PARTWORD_CMPXCHG_LOOP]], label [[PARTWORD_CMPXCHG_END]]
774; CHECK:       partword.cmpxchg.end:
775; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[TMP13]], [[TMP2]]
776; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
777; CHECK-NEXT:    [[TMP17:%.*]] = insertvalue { i8, i1 } poison, i8 [[EXTRACTED]], 0
778; CHECK-NEXT:    [[TMP18:%.*]] = insertvalue { i8, i1 } [[TMP17]], i1 [[TMP14]], 1
779; CHECK-NEXT:    [[EXTRACT:%.*]] = extractvalue { i8, i1 } [[TMP18]], 0
780; CHECK-NEXT:    ret i8 [[EXTRACT]]
781;
782  %gep = getelementptr i8, ptr addrspace(3) %out, i64 4
783  %res = cmpxchg ptr addrspace(3) %gep, i8 %old, i8 %in seq_cst seq_cst, align 2
784  %extract = extractvalue {i8, i1} %res, 0
785  ret i8 %extract
786}
787
788define i8 @test_atomicrmw_inc_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
789; GCN-LABEL: @test_atomicrmw_inc_i8_global_agent(
790; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
791; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
792; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
793; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
794; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
795; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
796; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
797; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
798; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
799; GCN:       atomicrmw.start:
800; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
801; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
802; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
803; GCN-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
804; GCN-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
805; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
806; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
807; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
808; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
809; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
810; GCN-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
811; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
812; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
813; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
814; GCN:       atomicrmw.end:
815; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
816; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
817; GCN-NEXT:    ret i8 [[EXTRACTED3]]
818;
819; R600-LABEL: @test_atomicrmw_inc_i8_global_agent(
820; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
821; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
822; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
823; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
824; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
825; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
826; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
827; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
828; R600:       atomicrmw.start:
829; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
830; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
831; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
832; R600-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
833; R600-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
834; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
835; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
836; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
837; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
838; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
839; R600-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
840; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
841; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
842; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
843; R600:       atomicrmw.end:
844; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
845; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
846; R600-NEXT:    ret i8 [[EXTRACTED3]]
847;
848  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
849  ret i8 %res
850}
851
852define i8 @test_atomicrmw_inc_i8_global_agent_align2(ptr addrspace(1) %ptr, i8 %value) {
853; GCN-LABEL: @test_atomicrmw_inc_i8_global_agent_align2(
854; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
855; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
856; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
857; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
858; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
859; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
860; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
861; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
862; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
863; GCN:       atomicrmw.start:
864; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
865; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
866; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
867; GCN-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
868; GCN-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
869; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
870; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
871; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
872; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
873; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
874; GCN-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
875; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
876; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
877; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
878; GCN:       atomicrmw.end:
879; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
880; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
881; GCN-NEXT:    ret i8 [[EXTRACTED3]]
882;
883; R600-LABEL: @test_atomicrmw_inc_i8_global_agent_align2(
884; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
885; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
886; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
887; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
888; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
889; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
890; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
891; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
892; R600:       atomicrmw.start:
893; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
894; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
895; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
896; R600-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
897; R600-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
898; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
899; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
900; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
901; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
902; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
903; R600-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
904; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
905; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
906; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
907; R600:       atomicrmw.end:
908; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
909; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
910; R600-NEXT:    ret i8 [[EXTRACTED3]]
911;
912  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 2
913  ret i8 %res
914}
915
916define i8 @test_atomicrmw_inc_i8_global_agent_align4(ptr addrspace(1) %ptr, i8 %value) {
917; CHECK-LABEL: @test_atomicrmw_inc_i8_global_agent_align4(
918; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
919; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
920; CHECK:       atomicrmw.start:
921; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
922; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
923; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1
924; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
925; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]]
926; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
927; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
928; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
929; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
930; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
931; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
932; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
933; CHECK:       atomicrmw.end:
934; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
935; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
936;
937  %res = atomicrmw uinc_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 4
938  ret i8 %res
939}
940
941define i8 @test_atomicrmw_inc_i8_local(ptr addrspace(3) %ptr, i8 %value) {
942; CHECK-LABEL: @test_atomicrmw_inc_i8_local(
943; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
944; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
945; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
946; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
947; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
948; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
949; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
950; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
951; CHECK:       atomicrmw.start:
952; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
953; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
954; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
955; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
956; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
957; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
958; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
959; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
960; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
961; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
962; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
963; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
964; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
965; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
966; CHECK:       atomicrmw.end:
967; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
968; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
969; CHECK-NEXT:    ret i8 [[EXTRACTED3]]
970;
971  %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i8 %value seq_cst
972  ret i8 %res
973}
974
975define i8 @test_atomicrmw_inc_i8_local_align2(ptr addrspace(3) %ptr, i8 %value) {
976; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align2(
977; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
978; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
979; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
980; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
981; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
982; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
983; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
984; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
985; CHECK:       atomicrmw.start:
986; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
987; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
988; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
989; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
990; CHECK-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
991; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
992; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
993; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
994; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
995; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
996; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
997; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
998; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
999; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1000; CHECK:       atomicrmw.end:
1001; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1002; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1003; CHECK-NEXT:    ret i8 [[EXTRACTED3]]
1004;
1005  %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i8 %value seq_cst, align 2
1006  ret i8 %res
1007}
1008
1009define i8 @test_atomicrmw_inc_i8_local_align4(ptr addrspace(3) %ptr, i8 %value) {
1010; CHECK-LABEL: @test_atomicrmw_inc_i8_local_align4(
1011; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4
1012; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1013; CHECK:       atomicrmw.start:
1014; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1015; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
1016; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1
1017; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1018; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]]
1019; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1020; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
1021; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1022; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
1023; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1024; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
1025; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1026; CHECK:       atomicrmw.end:
1027; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
1028; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
1029;
1030  %res = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i8 %value seq_cst, align 4
1031  ret i8 %res
1032}
1033
1034define i8 @test_atomicrmw_inc_i8_flat_agent(ptr %ptr, i8 %value) {
1035; GCN-LABEL: @test_atomicrmw_inc_i8_flat_agent(
1036; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1037; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1038; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1039; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1040; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1041; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1042; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1043; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1044; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1045; GCN:       atomicrmw.start:
1046; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1047; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1048; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1049; GCN-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
1050; GCN-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1051; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
1052; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1053; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1054; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1055; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1056; GCN-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1057; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1058; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1059; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1060; GCN:       atomicrmw.end:
1061; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1062; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1063; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1064;
1065; R600-LABEL: @test_atomicrmw_inc_i8_flat_agent(
1066; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -4)
1067; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i32
1068; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1069; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1070; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1071; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1072; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1073; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1074; R600:       atomicrmw.start:
1075; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1076; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1077; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1078; R600-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
1079; R600-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1080; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
1081; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1082; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1083; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1084; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1085; R600-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1086; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1087; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1088; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1089; R600:       atomicrmw.end:
1090; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1091; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1092; R600-NEXT:    ret i8 [[EXTRACTED3]]
1093;
1094  %res = atomicrmw uinc_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst
1095  ret i8 %res
1096}
1097
1098define i8 @test_atomicrmw_inc_i8_flat_agent_align2(ptr %ptr, i8 %value) {
1099; GCN-LABEL: @test_atomicrmw_inc_i8_flat_agent_align2(
1100; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1101; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1102; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1103; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1104; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1105; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1106; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1107; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1108; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1109; GCN:       atomicrmw.start:
1110; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1111; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1112; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1113; GCN-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
1114; GCN-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1115; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
1116; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1117; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1118; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1119; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1120; GCN-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1121; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1122; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1123; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1124; GCN:       atomicrmw.end:
1125; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1126; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1127; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1128;
1129; R600-LABEL: @test_atomicrmw_inc_i8_flat_agent_align2(
1130; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -4)
1131; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i32
1132; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1133; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1134; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1135; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1136; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1137; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1138; R600:       atomicrmw.start:
1139; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1140; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1141; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1142; R600-NEXT:    [[TMP4:%.*]] = add i8 [[EXTRACTED]], 1
1143; R600-NEXT:    [[TMP5:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1144; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 0, i8 [[TMP4]]
1145; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1146; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1147; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1148; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1149; R600-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1150; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1151; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1152; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1153; R600:       atomicrmw.end:
1154; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1155; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1156; R600-NEXT:    ret i8 [[EXTRACTED3]]
1157;
1158  %res = atomicrmw uinc_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst, align 2
1159  ret i8 %res
1160}
1161
1162define i8 @test_atomicrmw_inc_i8_flat_agent_align4(ptr %ptr, i8 %value) {
1163; CHECK-LABEL: @test_atomicrmw_inc_i8_flat_agent_align4(
1164; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1165; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1166; CHECK:       atomicrmw.start:
1167; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1168; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
1169; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[EXTRACTED]], 1
1170; CHECK-NEXT:    [[TMP3:%.*]] = icmp uge i8 [[EXTRACTED]], [[VALUE:%.*]]
1171; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i8 0, i8 [[TMP2]]
1172; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1173; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
1174; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1175; CHECK-NEXT:    [[TMP4:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1176; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
1177; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP4]], 0
1178; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1179; CHECK:       atomicrmw.end:
1180; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
1181; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
1182;
1183  %res = atomicrmw uinc_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst, align 4
1184  ret i8 %res
1185}
1186
1187define i8 @test_atomicrmw_dec_i8_global_agent(ptr addrspace(1) %ptr, i8 %value) {
1188; GCN-LABEL: @test_atomicrmw_dec_i8_global_agent(
1189; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
1190; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
1191; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1192; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1193; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1194; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1195; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1196; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1197; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1198; GCN:       atomicrmw.start:
1199; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1200; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1201; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1202; GCN-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1203; GCN-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1204; GCN-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1205; GCN-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1206; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1207; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1208; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1209; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1210; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1211; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1212; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1213; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1214; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1215; GCN:       atomicrmw.end:
1216; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1217; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1218; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1219;
1220; R600-LABEL: @test_atomicrmw_dec_i8_global_agent(
1221; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
1222; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
1223; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1224; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1225; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1226; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1227; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1228; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1229; R600:       atomicrmw.start:
1230; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1231; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1232; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1233; R600-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1234; R600-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1235; R600-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1236; R600-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1237; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1238; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1239; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1240; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1241; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1242; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1243; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1244; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1245; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1246; R600:       atomicrmw.end:
1247; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1248; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1249; R600-NEXT:    ret i8 [[EXTRACTED3]]
1250;
1251  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst
1252  ret i8 %res
1253}
1254
1255define i8 @test_atomicrmw_dec_i8_global_agent_align2(ptr addrspace(1) %ptr, i8 %value) {
1256; GCN-LABEL: @test_atomicrmw_dec_i8_global_agent_align2(
1257; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) [[PTR:%.*]], i64 -4)
1258; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
1259; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1260; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1261; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1262; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1263; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1264; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1265; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1266; GCN:       atomicrmw.start:
1267; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1268; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1269; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1270; GCN-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1271; GCN-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1272; GCN-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1273; GCN-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1274; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1275; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1276; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1277; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1278; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1279; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1280; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1281; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1282; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1283; GCN:       atomicrmw.end:
1284; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1285; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1286; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1287;
1288; R600-LABEL: @test_atomicrmw_dec_i8_global_agent_align2(
1289; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR:%.*]], i32 -4)
1290; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
1291; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1292; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1293; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1294; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1295; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(1) [[ALIGNEDADDR]], align 4
1296; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1297; R600:       atomicrmw.start:
1298; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1299; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1300; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1301; R600-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1302; R600-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1303; R600-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1304; R600-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1305; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1306; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1307; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1308; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1309; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1310; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(1) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1311; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1312; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1313; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1314; R600:       atomicrmw.end:
1315; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1316; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1317; R600-NEXT:    ret i8 [[EXTRACTED3]]
1318;
1319  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 2
1320  ret i8 %res
1321}
1322
1323define i8 @test_atomicrmw_dec_i8_global_agent_align4(ptr addrspace(1) %ptr, i8 %value) {
1324; CHECK-LABEL: @test_atomicrmw_dec_i8_global_agent_align4(
1325; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(1) [[PTR:%.*]], align 4
1326; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1327; CHECK:       atomicrmw.start:
1328; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1329; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
1330; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1
1331; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1332; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1333; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
1334; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]]
1335; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1336; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
1337; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1338; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(1) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1339; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1340; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1341; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1342; CHECK:       atomicrmw.end:
1343; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
1344; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
1345;
1346  %res = atomicrmw udec_wrap ptr addrspace(1) %ptr, i8 %value syncscope("agent") seq_cst, align 4
1347  ret i8 %res
1348}
1349
1350define i8 @test_atomicrmw_dec_i8_local(ptr addrspace(3) %ptr, i8 %value) {
1351; CHECK-LABEL: @test_atomicrmw_dec_i8_local(
1352; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
1353; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
1354; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1355; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1356; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1357; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1358; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
1359; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1360; CHECK:       atomicrmw.start:
1361; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1362; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1363; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1364; CHECK-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1365; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1366; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1367; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1368; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1369; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1370; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1371; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1372; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1373; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
1374; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1375; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1376; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1377; CHECK:       atomicrmw.end:
1378; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1379; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1380; CHECK-NEXT:    ret i8 [[EXTRACTED3]]
1381;
1382  %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i8 %value seq_cst
1383  ret i8 %res
1384}
1385
1386define i8 @test_atomicrmw_dec_i8_local_align2(ptr addrspace(3) %ptr, i8 %value) {
1387; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align2(
1388; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[PTR:%.*]], i32 -4)
1389; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[PTR]] to i32
1390; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1391; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1392; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1393; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1394; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspace(3) [[ALIGNEDADDR]], align 4
1395; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1396; CHECK:       atomicrmw.start:
1397; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1398; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1399; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1400; CHECK-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1401; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1402; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1403; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1404; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1405; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1406; CHECK-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1407; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1408; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1409; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(3) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
1410; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1411; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1412; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1413; CHECK:       atomicrmw.end:
1414; CHECK-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1415; CHECK-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1416; CHECK-NEXT:    ret i8 [[EXTRACTED3]]
1417;
1418  %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i8 %value seq_cst, align 2
1419  ret i8 %res
1420}
1421
1422define i8 @test_atomicrmw_dec_i8_local_align4(ptr addrspace(3) %ptr, i8 %value) {
1423; CHECK-LABEL: @test_atomicrmw_dec_i8_local_align4(
1424; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(3) [[PTR:%.*]], align 4
1425; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1426; CHECK:       atomicrmw.start:
1427; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1428; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
1429; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1
1430; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1431; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1432; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
1433; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]]
1434; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1435; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
1436; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1437; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(3) [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] seq_cst seq_cst, align 4
1438; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1439; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1440; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1441; CHECK:       atomicrmw.end:
1442; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
1443; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
1444;
1445  %res = atomicrmw udec_wrap ptr addrspace(3) %ptr, i8 %value seq_cst, align 4
1446  ret i8 %res
1447}
1448
1449define i8 @test_atomicrmw_dec_i8_flat_agent(ptr %ptr, i8 %value) {
1450; GCN-LABEL: @test_atomicrmw_dec_i8_flat_agent(
1451; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1452; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1453; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1454; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1455; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1456; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1457; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1458; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1459; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1460; GCN:       atomicrmw.start:
1461; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1462; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1463; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1464; GCN-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1465; GCN-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1466; GCN-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1467; GCN-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1468; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1469; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1470; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1471; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1472; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1473; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1474; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1475; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1476; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1477; GCN:       atomicrmw.end:
1478; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1479; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1480; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1481;
1482; R600-LABEL: @test_atomicrmw_dec_i8_flat_agent(
1483; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -4)
1484; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i32
1485; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1486; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1487; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1488; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1489; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1490; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1491; R600:       atomicrmw.start:
1492; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1493; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1494; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1495; R600-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1496; R600-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1497; R600-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1498; R600-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1499; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1500; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1501; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1502; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1503; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1504; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1505; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1506; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1507; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1508; R600:       atomicrmw.end:
1509; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1510; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1511; R600-NEXT:    ret i8 [[EXTRACTED3]]
1512;
1513  %res = atomicrmw udec_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst
1514  ret i8 %res
1515}
1516
1517define i8 @test_atomicrmw_dec_i8_flat_agent_align2(ptr %ptr, i8 %value) {
1518; GCN-LABEL: @test_atomicrmw_dec_i8_flat_agent_align2(
1519; GCN-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
1520; GCN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i64
1521; GCN-NEXT:    [[PTRLSB:%.*]] = and i64 [[TMP1]], 3
1522; GCN-NEXT:    [[TMP2:%.*]] = shl i64 [[PTRLSB]], 3
1523; GCN-NEXT:    [[SHIFTAMT:%.*]] = trunc i64 [[TMP2]] to i32
1524; GCN-NEXT:    [[MASK:%.*]] = shl i32 255, [[SHIFTAMT]]
1525; GCN-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1526; GCN-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1527; GCN-NEXT:    br label [[ATOMICRMW_START:%.*]]
1528; GCN:       atomicrmw.start:
1529; GCN-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1530; GCN-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[SHIFTAMT]]
1531; GCN-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1532; GCN-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1533; GCN-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1534; GCN-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1535; GCN-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1536; GCN-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1537; GCN-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1538; GCN-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[SHIFTAMT]]
1539; GCN-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1540; GCN-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1541; GCN-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1542; GCN-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1543; GCN-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1544; GCN-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1545; GCN:       atomicrmw.end:
1546; GCN-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[SHIFTAMT]]
1547; GCN-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1548; GCN-NEXT:    ret i8 [[EXTRACTED3]]
1549;
1550; R600-LABEL: @test_atomicrmw_dec_i8_flat_agent_align2(
1551; R600-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -4)
1552; R600-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[PTR]] to i32
1553; R600-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1554; R600-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1555; R600-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1556; R600-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1557; R600-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ALIGNEDADDR]], align 4
1558; R600-NEXT:    br label [[ATOMICRMW_START:%.*]]
1559; R600:       atomicrmw.start:
1560; R600-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP3]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1561; R600-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[LOADED]], [[TMP2]]
1562; R600-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1563; R600-NEXT:    [[TMP4:%.*]] = sub i8 [[EXTRACTED]], 1
1564; R600-NEXT:    [[TMP5:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1565; R600-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1566; R600-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
1567; R600-NEXT:    [[NEW:%.*]] = select i1 [[TMP7]], i8 [[VALUE]], i8 [[TMP4]]
1568; R600-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1569; R600-NEXT:    [[SHIFTED1:%.*]] = shl nuw i32 [[EXTENDED]], [[TMP2]]
1570; R600-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1571; R600-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[SHIFTED1]]
1572; R600-NEXT:    [[TMP8:%.*]] = cmpxchg ptr [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1573; R600-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1574; R600-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1575; R600-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1576; R600:       atomicrmw.end:
1577; R600-NEXT:    [[SHIFTED2:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1578; R600-NEXT:    [[EXTRACTED3:%.*]] = trunc i32 [[SHIFTED2]] to i8
1579; R600-NEXT:    ret i8 [[EXTRACTED3]]
1580;
1581  %res = atomicrmw udec_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst, align 2
1582  ret i8 %res
1583}
1584
1585define i8 @test_atomicrmw_dec_i8_flat_agent_align4(ptr %ptr, i8 %value) {
1586; CHECK-LABEL: @test_atomicrmw_dec_i8_flat_agent_align4(
1587; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[PTR:%.*]], align 4
1588; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1589; CHECK:       atomicrmw.start:
1590; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP1]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1591; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[LOADED]] to i8
1592; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 [[EXTRACTED]], 1
1593; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i8 [[EXTRACTED]], 0
1594; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[EXTRACTED]], [[VALUE:%.*]]
1595; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
1596; CHECK-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i8 [[VALUE]], i8 [[TMP2]]
1597; CHECK-NEXT:    [[EXTENDED:%.*]] = zext i8 [[NEW]] to i32
1598; CHECK-NEXT:    [[UNMASKED:%.*]] = and i32 [[LOADED]], -256
1599; CHECK-NEXT:    [[INSERTED:%.*]] = or i32 [[UNMASKED]], [[EXTENDED]]
1600; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr [[PTR]], i32 [[LOADED]], i32 [[INSERTED]] syncscope("agent") seq_cst seq_cst, align 4
1601; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1602; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1603; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1604; CHECK:       atomicrmw.end:
1605; CHECK-NEXT:    [[EXTRACTED1:%.*]] = trunc i32 [[NEWLOADED]] to i8
1606; CHECK-NEXT:    ret i8 [[EXTRACTED1]]
1607;
1608  %res = atomicrmw udec_wrap ptr %ptr, i8 %value syncscope("agent") seq_cst, align 4
1609  ret i8 %res
1610}
1611
1612define i8 @test_atomicrmw_xchg_i8_buffer_fat_agent(ptr addrspace(7) %ptr, i8 %value) {
1613; CHECK-LABEL: @test_atomicrmw_xchg_i8_buffer_fat_agent(
1614; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1615; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1616; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1617; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1618; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1619; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1620; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
1621; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1622; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1623; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1624; CHECK:       atomicrmw.start:
1625; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1626; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1627; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[VALOPERAND_SHIFTED]]
1628; CHECK-NEXT:    [[TMP7:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP6]] syncscope("agent") seq_cst seq_cst, align 4
1629; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP7]], 1
1630; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP7]], 0
1631; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1632; CHECK:       atomicrmw.end:
1633; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1634; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1635; CHECK-NEXT:    ret i8 [[EXTRACTED]]
1636;
1637  %res = atomicrmw xchg ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst
1638  ret i8 %res
1639}
1640
1641define i8 @test_atomicrmw_xchg_i8_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i8 %value) {
1642; CHECK-LABEL: @test_atomicrmw_xchg_i8_buffer_fat_agent_align4(
1643; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32
1644; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1645; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1646; CHECK:       atomicrmw.start:
1647; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1648; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[LOADED]], -256
1649; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP1]]
1650; CHECK-NEXT:    [[TMP5:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP4]] syncscope("agent") seq_cst seq_cst, align 4
1651; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
1652; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP5]], 0
1653; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1654; CHECK:       atomicrmw.end:
1655; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8
1656; CHECK-NEXT:    ret i8 [[EXTRACTED]]
1657;
1658  %res = atomicrmw xchg ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst, align 4
1659  ret i8 %res
1660}
1661
1662define i8 @test_atomicrmw_add_i8_buffer_fat_agent(ptr addrspace(7) %ptr, i8 %value) {
1663; CHECK-LABEL: @test_atomicrmw_add_i8_buffer_fat_agent(
1664; CHECK-NEXT:    [[ALIGNEDADDR:%.*]] = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) [[PTR:%.*]], i32 -4)
1665; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr addrspace(7) [[PTR]] to i32
1666; CHECK-NEXT:    [[PTRLSB:%.*]] = and i32 [[TMP1]], 3
1667; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[PTRLSB]], 3
1668; CHECK-NEXT:    [[MASK:%.*]] = shl i32 255, [[TMP2]]
1669; CHECK-NEXT:    [[INV_MASK:%.*]] = xor i32 [[MASK]], -1
1670; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[VALUE:%.*]] to i32
1671; CHECK-NEXT:    [[VALOPERAND_SHIFTED:%.*]] = shl i32 [[TMP3]], [[TMP2]]
1672; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr addrspace(7) [[ALIGNEDADDR]], align 4
1673; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1674; CHECK:       atomicrmw.start:
1675; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP4]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1676; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[VALOPERAND_SHIFTED]]
1677; CHECK-NEXT:    [[TMP5:%.*]] = and i32 [[NEW]], [[MASK]]
1678; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[LOADED]], [[INV_MASK]]
1679; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
1680; CHECK-NEXT:    [[TMP8:%.*]] = cmpxchg ptr addrspace(7) [[ALIGNEDADDR]], i32 [[LOADED]], i32 [[TMP7]] syncscope("agent") seq_cst seq_cst, align 4
1681; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1
1682; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP8]], 0
1683; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1684; CHECK:       atomicrmw.end:
1685; CHECK-NEXT:    [[SHIFTED:%.*]] = lshr i32 [[NEWLOADED]], [[TMP2]]
1686; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[SHIFTED]] to i8
1687; CHECK-NEXT:    ret i8 [[EXTRACTED]]
1688;
1689  %res = atomicrmw add ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst
1690  ret i8 %res
1691}
1692
1693define i8 @test_atomicrmw_add_i8_buffer_fat_agent_align4(ptr addrspace(7) %ptr, i8 %value) {
1694; CHECK-LABEL: @test_atomicrmw_add_i8_buffer_fat_agent_align4(
1695; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[VALUE:%.*]] to i32
1696; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(7) [[PTR:%.*]], align 4
1697; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
1698; CHECK:       atomicrmw.start:
1699; CHECK-NEXT:    [[LOADED:%.*]] = phi i32 [ [[TMP2]], [[TMP0:%.*]] ], [ [[NEWLOADED:%.*]], [[ATOMICRMW_START]] ]
1700; CHECK-NEXT:    [[NEW:%.*]] = add i32 [[LOADED]], [[TMP1]]
1701; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[NEW]], 255
1702; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[LOADED]], -256
1703; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
1704; CHECK-NEXT:    [[TMP6:%.*]] = cmpxchg ptr addrspace(7) [[PTR]], i32 [[LOADED]], i32 [[TMP5]] syncscope("agent") seq_cst seq_cst, align 4
1705; CHECK-NEXT:    [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP6]], 1
1706; CHECK-NEXT:    [[NEWLOADED]] = extractvalue { i32, i1 } [[TMP6]], 0
1707; CHECK-NEXT:    br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]]
1708; CHECK:       atomicrmw.end:
1709; CHECK-NEXT:    [[EXTRACTED:%.*]] = trunc i32 [[NEWLOADED]] to i8
1710; CHECK-NEXT:    ret i8 [[EXTRACTED]]
1711;
1712  %res = atomicrmw add ptr addrspace(7) %ptr, i8 %value syncscope("agent") seq_cst, align 4
1713  ret i8 %res
1714}
1715