xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll (revision ea43a30899df5c3c36412392c8f4db79973a1c43)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
5
6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
7; GFX7-LABEL: @uadd_sat_v2i16(
8; GFX7-NEXT:  bb:
9; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
10; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
11; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
12; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
13; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
14; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
15; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
16; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
17; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
18;
19; GFX8-LABEL: @uadd_sat_v2i16(
20; GFX8-NEXT:  bb:
21; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
22; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
23;
24; GFX9-LABEL: @uadd_sat_v2i16(
25; GFX9-NEXT:  bb:
26; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
27; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
28;
29bb:
30  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
31  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
32  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
33  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
34  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
35  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
36  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
37  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
38  ret <2 x i16> %ins.1
39}
40
41define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
42; GFX7-LABEL: @usub_sat_v2i16(
43; GFX7-NEXT:  bb:
44; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
45; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
46; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
47; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
48; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
49; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
50; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
51; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
52; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
53;
54; GFX8-LABEL: @usub_sat_v2i16(
55; GFX8-NEXT:  bb:
56; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
57; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
58;
59; GFX9-LABEL: @usub_sat_v2i16(
60; GFX9-NEXT:  bb:
61; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
62; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
63;
64bb:
65  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
66  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
67  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
68  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
69  %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0)
70  %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1)
71  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
72  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
73  ret <2 x i16> %ins.1
74}
75
76define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
77; GFX7-LABEL: @sadd_sat_v2i16(
78; GFX7-NEXT:  bb:
79; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
80; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
81; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
82; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
83; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
84; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
85; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
86; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
87; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
88;
89; GFX8-LABEL: @sadd_sat_v2i16(
90; GFX8-NEXT:  bb:
91; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
92; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
93;
94; GFX9-LABEL: @sadd_sat_v2i16(
95; GFX9-NEXT:  bb:
96; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
97; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
98;
99bb:
100  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
101  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
102  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
103  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
104  %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
105  %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
106  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
107  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
108  ret <2 x i16> %ins.1
109}
110
111define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
112; GFX7-LABEL: @ssub_sat_v2i16(
113; GFX7-NEXT:  bb:
114; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0
115; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1
116; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0
117; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1
118; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
119; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
120; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <2 x i16> poison, i16 [[ADD_0]], i64 0
121; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
122; GFX7-NEXT:    ret <2 x i16> [[INS_1]]
123;
124; GFX8-LABEL: @ssub_sat_v2i16(
125; GFX8-NEXT:  bb:
126; GFX8-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
127; GFX8-NEXT:    ret <2 x i16> [[TMP0]]
128;
129; GFX9-LABEL: @ssub_sat_v2i16(
130; GFX9-NEXT:  bb:
131; GFX9-NEXT:    [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]])
132; GFX9-NEXT:    ret <2 x i16> [[TMP0]]
133;
134bb:
135  %arg0.0 = extractelement <2 x i16> %arg0, i64 0
136  %arg0.1 = extractelement <2 x i16> %arg0, i64 1
137  %arg1.0 = extractelement <2 x i16> %arg1, i64 0
138  %arg1.1 = extractelement <2 x i16> %arg1, i64 1
139  %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0)
140  %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1)
141  %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0
142  %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1
143  ret <2 x i16> %ins.1
144}
145
146define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
147; GCN-LABEL: @uadd_sat_v2i32(
148; GCN-NEXT:  bb:
149; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
150; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
151; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
152; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
153; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
154; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
155; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
156; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
157; GCN-NEXT:    ret <2 x i32> [[INS_1]]
158;
159bb:
160  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
161  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
162  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
163  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
164  %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
165  %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
166  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
167  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
168  ret <2 x i32> %ins.1
169}
170
171define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
172; GCN-LABEL: @usub_sat_v2i32(
173; GCN-NEXT:  bb:
174; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
175; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
176; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
177; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
178; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
179; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
180; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
181; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
182; GCN-NEXT:    ret <2 x i32> [[INS_1]]
183;
184bb:
185  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
186  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
187  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
188  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
189  %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0)
190  %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1)
191  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
192  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
193  ret <2 x i32> %ins.1
194}
195
196define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
197; GCN-LABEL: @sadd_sat_v2i32(
198; GCN-NEXT:  bb:
199; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
200; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
201; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
202; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
203; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
204; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
205; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
206; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
207; GCN-NEXT:    ret <2 x i32> [[INS_1]]
208;
209bb:
210  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
211  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
212  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
213  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
214  %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0)
215  %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1)
216  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
217  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
218  ret <2 x i32> %ins.1
219}
220
221define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
222; GCN-LABEL: @ssub_sat_v2i32(
223; GCN-NEXT:  bb:
224; GCN-NEXT:    [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
225; GCN-NEXT:    [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
226; GCN-NEXT:    [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
227; GCN-NEXT:    [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
228; GCN-NEXT:    [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
229; GCN-NEXT:    [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
230; GCN-NEXT:    [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
231; GCN-NEXT:    [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
232; GCN-NEXT:    ret <2 x i32> [[INS_1]]
233;
234bb:
235  %arg0.0 = extractelement <2 x i32> %arg0, i64 0
236  %arg0.1 = extractelement <2 x i32> %arg0, i64 1
237  %arg1.0 = extractelement <2 x i32> %arg1, i64 0
238  %arg1.1 = extractelement <2 x i32> %arg1, i64 1
239  %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0)
240  %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1)
241  %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0
242  %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1
243  ret <2 x i32> %ins.1
244}
245
246define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
247; GFX7-LABEL: @uadd_sat_v3i16(
248; GFX7-NEXT:  bb:
249; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0
250; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1
251; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2
252; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0
253; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1
254; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2
255; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
256; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
257; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
258; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <3 x i16> poison, i16 [[ADD_0]], i64 0
259; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
260; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
261; GFX7-NEXT:    ret <3 x i16> [[INS_2]]
262;
263; GFX8-LABEL: @uadd_sat_v3i16(
264; GFX8-NEXT:  bb:
265; GFX8-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
266; GFX8-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
267; GFX8-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
268; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
269; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
270; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
271; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
272; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
273; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
274;
275; GFX9-LABEL: @uadd_sat_v3i16(
276; GFX9-NEXT:  bb:
277; GFX9-NEXT:    [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
278; GFX9-NEXT:    [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
279; GFX9-NEXT:    [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
280; GFX9-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
281; GFX9-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
282; GFX9-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
283; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
284; GFX9-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
285; GFX9-NEXT:    ret <3 x i16> [[INS_2]]
286;
287bb:
288  %arg0.0 = extractelement <3 x i16> %arg0, i64 0
289  %arg0.1 = extractelement <3 x i16> %arg0, i64 1
290  %arg0.2 = extractelement <3 x i16> %arg0, i64 2
291  %arg1.0 = extractelement <3 x i16> %arg1, i64 0
292  %arg1.1 = extractelement <3 x i16> %arg1, i64 1
293  %arg1.2 = extractelement <3 x i16> %arg1, i64 2
294  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
295  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
296  %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
297  %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0
298  %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1
299  %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2
300  ret <3 x i16> %ins.2
301}
302
303define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
304; GFX7-LABEL: @uadd_sat_v4i16(
305; GFX7-NEXT:  bb:
306; GFX7-NEXT:    [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0
307; GFX7-NEXT:    [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1
308; GFX7-NEXT:    [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2
309; GFX7-NEXT:    [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
310; GFX7-NEXT:    [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0
311; GFX7-NEXT:    [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1
312; GFX7-NEXT:    [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2
313; GFX7-NEXT:    [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
314; GFX7-NEXT:    [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]])
315; GFX7-NEXT:    [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
316; GFX7-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
317; GFX7-NEXT:    [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
318; GFX7-NEXT:    [[INS_0:%.*]] = insertelement <4 x i16> poison, i16 [[ADD_0]], i64 0
319; GFX7-NEXT:    [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1
320; GFX7-NEXT:    [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2
321; GFX7-NEXT:    [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
322; GFX7-NEXT:    ret <4 x i16> [[INS_3]]
323;
324; GFX8-LABEL: @uadd_sat_v4i16(
325; GFX8-NEXT:  bb:
326; GFX8-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
327; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
328; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
329; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
330; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
331; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
332; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
333; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
334;
335; GFX9-LABEL: @uadd_sat_v4i16(
336; GFX9-NEXT:  bb:
337; GFX9-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
338; GFX9-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
339; GFX9-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
340; GFX9-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
341; GFX9-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
342; GFX9-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
343; GFX9-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
344; GFX9-NEXT:    ret <4 x i16> [[INS_31]]
345;
346bb:
347  %arg0.0 = extractelement <4 x i16> %arg0, i64 0
348  %arg0.1 = extractelement <4 x i16> %arg0, i64 1
349  %arg0.2 = extractelement <4 x i16> %arg0, i64 2
350  %arg0.3 = extractelement <4 x i16> %arg0, i64 3
351  %arg1.0 = extractelement <4 x i16> %arg1, i64 0
352  %arg1.1 = extractelement <4 x i16> %arg1, i64 1
353  %arg1.2 = extractelement <4 x i16> %arg1, i64 2
354  %arg1.3 = extractelement <4 x i16> %arg1, i64 3
355  %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0)
356  %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1)
357  %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2)
358  %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3)
359  %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0
360  %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1
361  %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2
362  %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3
363  ret <4 x i16> %ins.3
364}
365
366declare i16 @llvm.uadd.sat.i16(i16, i16) #0
367declare i16 @llvm.usub.sat.i16(i16, i16) #0
368declare i16 @llvm.sadd.sat.i16(i16, i16) #0
369declare i16 @llvm.ssub.sat.i16(i16, i16) #0
370
371declare i32 @llvm.uadd.sat.i32(i32, i32) #0
372declare i32 @llvm.usub.sat.i32(i32, i32) #0
373declare i32 @llvm.sadd.sat.i32(i32, i32) #0
374declare i32 @llvm.ssub.sat.i32(i32, i32) #0
375
376attributes #0 = { nounwind readnone speculatable willreturn }
377