xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll (revision f412b78ffc2b88b614a10310ca9ba473f1f0f9b9)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3
4;
5; EXTRQ
6;
7
8define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
9; CHECK-LABEL: @test_extrq_call(
10; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR1:[0-9]+]]
11; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
12;
13  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
14  ret <2 x i64> %1
15}
16
17define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
18; CHECK-LABEL: @test_extrq_zero_arg0(
19; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
20;
21  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
22  ret <2 x i64> %1
23}
24
25define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
26; CHECK-LABEL: @test_extrq_zero_arg1(
27; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
28; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
29; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
30; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
31;
32  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
33  ret <2 x i64> %1
34}
35
36define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
37; CHECK-LABEL: @test_extrq_to_extqi(
38; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 15)
39; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
40;
41  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
42  ret <2 x i64> %1
43}
44
45define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
46; CHECK-LABEL: @test_extrq_constant(
47; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
48;
49  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
50  ret <2 x i64> %1
51}
52
53define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
54; CHECK-LABEL: @test_extrq_constant_undef(
55; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
56;
57  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
58  ret <2 x i64> %1
59}
60
61define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
62; CHECK-LABEL: @test_extrq_call_constexpr(
63; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
64; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
65; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
66; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
67;
68  %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
69  ret <2 x i64> %1
70}
71
72;
73; EXTRQI
74;
75
76define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
77; CHECK-LABEL: @test_extrqi_call(
78; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 8, i8 23)
79; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
80;
81  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
82  ret <2 x i64> %1
83}
84
85define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
86; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
87; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
88; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
89; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
90; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
91;
92  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
93  ret <2 x i64> %1
94}
95
96define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
97; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
98; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8>
99; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 poison, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
100; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
101; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
102;
103  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
104  ret <2 x i64> %1
105}
106
107define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
108; CHECK-LABEL: @test_extrqi_undef(
109; CHECK-NEXT:    ret <2 x i64> undef
110;
111  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
112  ret <2 x i64> %1
113}
114
115define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
116; CHECK-LABEL: @test_extrqi_zero(
117; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
118;
119  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
120  ret <2 x i64> %1
121}
122
123define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
124; CHECK-LABEL: @test_extrqi_constant(
125; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
126;
127  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
128  ret <2 x i64> %1
129}
130
131define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
132; CHECK-LABEL: @test_extrqi_constant_undef(
133; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
134;
135  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
136  ret <2 x i64> %1
137}
138
139define <2 x i64> @test_extrqi_call_constexpr() {
140; CHECK-LABEL: @test_extrqi_call_constexpr(
141; CHECK-NEXT:    ret <2 x i64> zeroinitializer
142;
143  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
144  ret <2 x i64> %1
145}
146
147;
148; INSERTQ
149;
150
151define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
152; CHECK-LABEL: @test_insertq_call(
153; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #[[ATTR1]]
154; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
155;
156  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
157  ret <2 x i64> %1
158}
159
160define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
161; CHECK-LABEL: @test_insertq_to_insertqi(
162; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 8, i64 poison>, i8 18, i8 2)
163; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
164;
165  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
166  ret <2 x i64> %1
167}
168
169define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
170; CHECK-LABEL: @test_insertq_constant(
171; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
172;
173  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
174  ret <2 x i64> %1
175}
176
177define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
178; CHECK-LABEL: @test_insertq_constant_undef(
179; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
180;
181  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
182  ret <2 x i64> %1
183}
184
185define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
186; CHECK-LABEL: @test_insertq_call_constexpr(
187; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 poison>, i8 2, i8 0)
188; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
189;
190  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
191  ret <2 x i64> %1
192}
193
194;
195; INSERTQI
196;
197
198define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
199; CHECK-LABEL: @test_insertqi_shuffle_04uu(
200; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
201; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
202;
203  %1 = bitcast <16 x i8> %v to <2 x i64>
204  %2 = bitcast <16 x i8> %i to <2 x i64>
205  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
206  %4 = bitcast <2 x i64> %3 to <16 x i8>
207  ret <16 x i8> %4
208}
209
210define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
211; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
212; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
213; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
214;
215  %1 = bitcast <16 x i8> %v to <2 x i64>
216  %2 = bitcast <16 x i8> %i to <2 x i64>
217  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
218  %4 = bitcast <2 x i64> %3 to <16 x i8>
219  ret <16 x i8> %4
220}
221
222define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
223; CHECK-LABEL: @test_insertqi_constant(
224; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
225;
226  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
227  ret <2 x i64> %1
228}
229
230define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
231; CHECK-LABEL: @test_insertqi_call_constexpr(
232; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> <i64 0, i64 poison>, i8 48, i8 3)
233; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
234;
235  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
236  ret <2 x i64> %1
237}
238
239; The result of this insert is the second arg, since the top 64 bits of
240; the result are undefined, and we copy the bottom 64 bits from the
241; second arg
242define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
243; CHECK-LABEL: @testInsert64Bits(
244; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8>
245; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
246; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
247; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
248;
249  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
250  ret <2 x i64> %1
251}
252
253define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
254; CHECK-LABEL: @testZeroLength(
255; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8>
256; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
257; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
258; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
259;
260  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
261  ret <2 x i64> %1
262}
263
264define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
265; CHECK-LABEL: @testUndefinedInsertq_1(
266; CHECK-NEXT:    ret <2 x i64> undef
267;
268  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
269  ret <2 x i64> %1
270}
271
272define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
273; CHECK-LABEL: @testUndefinedInsertq_2(
274; CHECK-NEXT:    ret <2 x i64> undef
275;
276  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
277  ret <2 x i64> %1
278}
279
280define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
281; CHECK-LABEL: @testUndefinedInsertq_3(
282; CHECK-NEXT:    ret <2 x i64> undef
283;
284  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
285  ret <2 x i64> %1
286}
287
288;
289; Vector Demanded Bits
290;
291
292define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
293; CHECK-LABEL: @test_extrq_arg0(
294; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR1]]
295; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
296;
297  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
298  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
299  ret <2 x i64> %2
300}
301
302define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
303; CHECK-LABEL: @test_extrq_arg1(
304; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR1]]
305; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
306;
307  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
308  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
309  ret <2 x i64> %2
310}
311
312define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
313; CHECK-LABEL: @test_extrq_args01(
314; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> [[X:%.*]], <16 x i8> [[Y:%.*]]) #[[ATTR1]]
315; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
316;
317  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
318  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
319  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
320  ret <2 x i64> %3
321}
322
323define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
324; CHECK-LABEL: @test_extrq_ret(
325; CHECK-NEXT:    ret <2 x i64> poison
326;
327  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
328  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
329  ret <2 x i64> %2
330}
331
332define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
333; CHECK-LABEL: @test_extrqi_arg0(
334; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> [[X:%.*]], i8 3, i8 2)
335; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
336;
337  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
338  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
339  ret <2 x i64> %2
340}
341
342define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
343; CHECK-LABEL: @test_extrqi_ret(
344; CHECK-NEXT:    ret <2 x i64> poison
345;
346  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
347  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
348  ret <2 x i64> %2
349}
350
351define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
352; CHECK-LABEL: @test_insertq_arg0(
353; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]]) #[[ATTR1]]
354; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
355;
356  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
357  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
358  ret <2 x i64> %2
359}
360
361define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
362; CHECK-LABEL: @test_insertq_ret(
363; CHECK-NEXT:    ret <2 x i64> poison
364;
365  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
366  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
367  ret <2 x i64> %2
368}
369
370define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
371; CHECK-LABEL: @test_insertqi_arg0(
372; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #[[ATTR1]]
373; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
374;
375  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
376  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
377  ret <2 x i64> %2
378}
379
380define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
381; CHECK-LABEL: @test_insertqi_arg1(
382; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #[[ATTR1]]
383; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
384;
385  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
386  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
387  ret <2 x i64> %2
388}
389
390define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
391; CHECK-LABEL: @test_insertqi_args01(
392; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], i8 3, i8 2) #[[ATTR1]]
393; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
394;
395  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
396  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
397  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
398  ret <2 x i64> %3
399}
400
401define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
402; CHECK-LABEL: @test_insertqi_ret(
403; CHECK-NEXT:    ret <2 x i64> poison
404;
405  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
406  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
407  ret <2 x i64> %2
408}
409
410declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
411
412declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
413
414declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
415
416declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
417