xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/min_max.ll (revision 5921295dcaa1ad514d79e0ee824b9df1c077a2d0)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7; GFX7-LABEL: @uadd_sat_v2i16(
8; GFX7-NEXT:  bb:
9; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
16; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
18;
19; GFX8-LABEL: @uadd_sat_v2i16(
20; GFX8-NEXT:  bb:
21; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
22; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
23; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
24; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
25; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
26; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
27; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
28; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
29; GFX8-NEXT:    ret <2 x i16> [[INS_1]]
30;
31; GFX9-LABEL: @uadd_sat_v2i16(
32; GFX9-NEXT:  bb:
33; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
34; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
35;
36bb:
37  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
38  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
39  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
40  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
41  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
42  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
43  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
44  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
45  ret <2 x i16> %ins.1
46}
47
48define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
49; GFX7-LABEL: @usub_sat_v2i16(
50; GFX7-NEXT:  bb:
51; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
52; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
53; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
54; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
55; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
56; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
57; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
58; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
59; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
60;
61; GFX8-LABEL: @usub_sat_v2i16(
62; GFX8-NEXT:  bb:
63; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
64; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
65; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
66; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
67; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
68; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
69; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
70; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
71; GFX8-NEXT:    ret <2 x i16> [[INS_1]]
72;
73; GFX9-LABEL: @usub_sat_v2i16(
74; GFX9-NEXT:  bb:
75; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
76; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
77;
78bb:
79  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
80  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
81  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
82  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
83  %add.0 = call i16 @llvm.umax.i16(i16 %arg0.0, i16 %arg1.0)
84  %add.1 = call i16 @llvm.umax.i16(i16 %arg0.1, i16 %arg1.1)
85  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
86  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
87  ret <2 x i16> %ins.1
88}
89
90define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
91; GFX7-LABEL: @sadd_sat_v2i16(
92; GFX7-NEXT:  bb:
93; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
94; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
95; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
96; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
97; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
98; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
99; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
100; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
101; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
102;
103; GFX8-LABEL: @sadd_sat_v2i16(
104; GFX8-NEXT:  bb:
105; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
106; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
107; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
108; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
109; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
110; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.smin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
111; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
112; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
113; GFX8-NEXT:    ret <2 x i16> [[INS_1]]
114;
115; GFX9-LABEL: @sadd_sat_v2i16(
116; GFX9-NEXT:  bb:
117; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
118; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
119;
120bb:
121  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
122  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
123  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
124  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
125  %add.0 = call i16 @llvm.smin.i16(i16 %arg0.0, i16 %arg1.0)
126  %add.1 = call i16 @llvm.smin.i16(i16 %arg0.1, i16 %arg1.1)
127  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
128  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
129  ret <2 x i16> %ins.1
130}
131
132define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
133; GFX7-LABEL: @ssub_sat_v2i16(
134; GFX7-NEXT:  bb:
135; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
136; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
137; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
138; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
139; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
140; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
141; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
142; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
143; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
144;
145; GFX8-LABEL: @ssub_sat_v2i16(
146; GFX8-NEXT:  bb:
147; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
148; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
149; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
150; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
151; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
152; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.smax.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
153; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
154; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
155; GFX8-NEXT:    ret <2 x i16> [[INS_1]]
156;
157; GFX9-LABEL: @ssub_sat_v2i16(
158; GFX9-NEXT:  bb:
159; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
160; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
161;
162bb:
163  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
164  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
165  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
166  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
167  %add.0 = call i16 @llvm.smax.i16(i16 %arg0.0, i16 %arg1.0)
168  %add.1 = call i16 @llvm.smax.i16(i16 %arg0.1, i16 %arg1.1)
169  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
170  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
171  ret <2 x i16> %ins.1
172}
173
174define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
175; GCN-LABEL: @uadd_sat_v2i32(
176; GCN-NEXT:  bb:
177; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
178; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
179; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
180; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
181; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
182; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
183; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
184; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
185; GCN-NEXT:    ret <2 x i32> [[INS_1]]
186;
187bb:
188  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
189  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
190  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
191  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
192  %add.0 = call i32 @llvm.umin.i32(i32 %arg0.0, i32 %arg1.0)
193  %add.1 = call i32 @llvm.umin.i32(i32 %arg0.1, i32 %arg1.1)
194  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
195  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
196  ret <2 x i32> %ins.1
197}
198
199define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
200; GCN-LABEL: @usub_sat_v2i32(
201; GCN-NEXT:  bb:
202; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
203; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
204; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
205; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
206; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
207; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.umax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
208; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
209; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
210; GCN-NEXT:    ret <2 x i32> [[INS_1]]
211;
212bb:
213  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
214  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
215  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
216  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
217  %add.0 = call i32 @llvm.umax.i32(i32 %arg0.0, i32 %arg1.0)
218  %add.1 = call i32 @llvm.umax.i32(i32 %arg0.1, i32 %arg1.1)
219  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
220  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
221  ret <2 x i32> %ins.1
222}
223
224define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
225; GCN-LABEL: @sadd_sat_v2i32(
226; GCN-NEXT:  bb:
227; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
228; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
229; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
230; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
231; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
232; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.smin.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
233; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
234; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
235; GCN-NEXT:    ret <2 x i32> [[INS_1]]
236;
237bb:
238  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
239  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
240  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
241  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
242  %add.0 = call i32 @llvm.smin.i32(i32 %arg0.0, i32 %arg1.0)
243  %add.1 = call i32 @llvm.smin.i32(i32 %arg0.1, i32 %arg1.1)
244  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
245  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
246  ret <2 x i32> %ins.1
247}
248
249define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
250; GCN-LABEL: @ssub_sat_v2i32(
251; GCN-NEXT:  bb:
252; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
253; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
254; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
255; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
256; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
257; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.smax.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
258; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
259; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
260; GCN-NEXT:    ret <2 x i32> [[INS_1]]
261;
262bb:
263  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
264  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
265  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
266  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
267  %add.0 = call i32 @llvm.smax.i32(i32 %arg0.0, i32 %arg1.0)
268  %add.1 = call i32 @llvm.smax.i32(i32 %arg0.1, i32 %arg1.1)
269  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
270  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
271  ret <2 x i32> %ins.1
272}
273
274define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
275; GFX7-LABEL: @uadd_sat_v3i16(
276; GFX7-NEXT:  bb:
277; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
278; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
279; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
280; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
281; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
282; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
283; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
284; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
285; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
286; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
287; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
288; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
289; GFX7-NEXT:    ret <3 x i16> [[INS_2]]
290;
291; GFX8-LABEL: @uadd_sat_v3i16(
292; GFX8-NEXT:  bb:
293; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
294; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
295; GFX8-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
296; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
297; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
298; GFX8-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
299; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
300; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
301; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
303; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
304; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
305; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
306;
307; GFX9-LABEL: @uadd_sat_v3i16(
308; GFX9-NEXT:  bb:
309; GFX9-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
310; GFX9-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
311; GFX9-NEXT:    [[TMP0:%.*]] = call <3 x i16> @llvm.umin.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
312; GFX9-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
313; GFX9-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP0]], i16 [[ADD_2]], i64 2
314; GFX9-NEXT:    ret <3 x i16> [[INS_2]]
315;
316bb:
317  %arg0.0 = extractelement <3 x i16> %arg0, i64 0
318  %arg0.1 = extractelement <3 x i16> %arg0, i64 1
319  %arg0.2 = extractelement <3 x i16> %arg0, i64 2
320  %arg1.0 = extractelement <3 x i16> %arg1, i64 0
321  %arg1.1 = extractelement <3 x i16> %arg1, i64 1
322  %arg1.2 = extractelement <3 x i16> %arg1, i64 2
323  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
324  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
325  %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
326  %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
327  %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
328  %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
329  ret <3 x i16> %ins.2
330}
331
332define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
333; GFX7-LABEL: @uadd_sat_v4i16(
334; GFX7-NEXT:  bb:
335; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
336; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
337; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
338; GFX7-NEXT:    [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
339; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
340; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
341; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
342; GFX7-NEXT:    [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
343; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
344; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
345; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
346; GFX7-NEXT:    [[ADD_3:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
347; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
348; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
349; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
350; GFX7-NEXT:    [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
351; GFX7-NEXT:    ret <4 x i16> [[INS_3]]
352;
353; GFX8-LABEL: @uadd_sat_v4i16(
354; GFX8-NEXT:  bb:
355; GFX8-NEXT:    [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
356; GFX8-NEXT:    [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
357; GFX8-NEXT:    [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
358; GFX8-NEXT:    [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
359; GFX8-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
360; GFX8-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.umin.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
361; GFX8-NEXT:    [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
362; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
363; GFX8-NEXT:    [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
364; GFX8-NEXT:    [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
365; GFX8-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
366; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <4 x i16> [[INS_1]], <4 x i16> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
367; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
368;
369; GFX9-LABEL: @uadd_sat_v4i16(
370; GFX9-NEXT:  bb:
371; GFX9-NEXT:    [[TMP0:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG1:%.*]])
372; GFX9-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG1]])
373; GFX9-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
374; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
375; GFX9-NEXT:    [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
376; GFX9-NEXT:    ret <4 x i16> [[INS_31]]
377;
378bb:
379  %arg0.0 = extractelement <4 x i16> %arg0, i64 0
380  %arg0.1 = extractelement <4 x i16> %arg0, i64 1
381  %arg0.2 = extractelement <4 x i16> %arg0, i64 2
382  %arg0.3 = extractelement <4 x i16> %arg0, i64 3
383  %arg1.0 = extractelement <4 x i16> %arg1, i64 0
384  %arg1.1 = extractelement <4 x i16> %arg1, i64 1
385  %arg1.2 = extractelement <4 x i16> %arg1, i64 2
386  %arg1.3 = extractelement <4 x i16> %arg1, i64 3
387  %add.0 = call i16 @llvm.umin.i16(i16 %arg0.0, i16 %arg1.0)
388  %add.1 = call i16 @llvm.umin.i16(i16 %arg0.1, i16 %arg1.1)
389  %add.2 = call i16 @llvm.umin.i16(i16 %arg0.2, i16 %arg1.2)
390  %add.3 = call i16 @llvm.umin.i16(i16 %arg0.3, i16 %arg1.3)
391  %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
392  %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
393  %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
394  %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
395  ret <4 x i16> %ins.3
396}
397