xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3
4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) #0 {
8; CHECK-LABEL: @test_x86_sse2_cmp_pd(
9; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11; CHECK-NEXT:    call void @llvm.donothing()
12; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
13; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
14; CHECK-NEXT:    [[TMP5:%.*]] = sext <2 x i1> [[TMP4]] to <2 x i64>
15; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7)
16; CHECK-NEXT:    store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8
17; CHECK-NEXT:    ret <2 x double> [[RES]]
18;
19  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
20  ret <2 x double> %res
21}
22declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
23
24
25define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) #0 {
26; CHECK-LABEL: @test_x86_sse2_cmp_sd(
27; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
28; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
29; CHECK-NEXT:    call void @llvm.donothing()
30; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
31; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
32; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
33; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
34; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
35; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7)
36; CHECK-NEXT:    store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
37; CHECK-NEXT:    ret <2 x double> [[RES]]
38;
39  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
40  ret <2 x double> %res
41}
42declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
43
44
45define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
46; CHECK-LABEL: @test_x86_sse2_comieq_sd(
47; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
48; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
49; CHECK-NEXT:    call void @llvm.donothing()
50; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
51; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
52; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
53; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
54; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
55; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
56; CHECK-NEXT:    ret i32 [[RES]]
57;
58  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
59  ret i32 %res
60}
61declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
62
63
64define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) #0 {
65; CHECK-LABEL: @test_x86_sse2_comige_sd(
66; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
67; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
68; CHECK-NEXT:    call void @llvm.donothing()
69; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
70; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
71; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
72; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
73; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
74; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
75; CHECK-NEXT:    ret i32 [[RES]]
76;
77  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
78  ret i32 %res
79}
80declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
81
82
83define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
84; CHECK-LABEL: @test_x86_sse2_comigt_sd(
85; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
86; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
87; CHECK-NEXT:    call void @llvm.donothing()
88; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
89; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
90; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
91; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
92; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
93; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
94; CHECK-NEXT:    ret i32 [[RES]]
95;
96  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
97  ret i32 %res
98}
99declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
100
101
102define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) #0 {
103; CHECK-LABEL: @test_x86_sse2_comile_sd(
104; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
105; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
106; CHECK-NEXT:    call void @llvm.donothing()
107; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
108; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
109; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
110; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
111; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
112; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
113; CHECK-NEXT:    ret i32 [[RES]]
114;
115  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
116  ret i32 %res
117}
118declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
119
120
121define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
122; CHECK-LABEL: @test_x86_sse2_comilt_sd(
123; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
124; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
125; CHECK-NEXT:    call void @llvm.donothing()
126; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
127; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
128; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
129; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
130; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
131; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
132; CHECK-NEXT:    ret i32 [[RES]]
133;
134  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
135  ret i32 %res
136}
137declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
138
139
140define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
141; CHECK-LABEL: @test_x86_sse2_comineq_sd(
142; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
143; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
144; CHECK-NEXT:    call void @llvm.donothing()
145; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
146; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
147; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
148; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
149; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
150; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
151; CHECK-NEXT:    ret i32 [[RES]]
152;
153  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
154  ret i32 %res
155}
156declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
157
158
159define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
160; CHECK-LABEL: @test_x86_sse2_cvtpd2dq(
161; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
162; CHECK-NEXT:    call void @llvm.donothing()
163; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
164; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
165; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
166; CHECK:       3:
167; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
168; CHECK-NEXT:    unreachable
169; CHECK:       4:
170; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
171; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
172; CHECK-NEXT:    ret <4 x i32> [[RES]]
173;
174  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
175  ret <4 x i32> %res
176}
177declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
178
179
180define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
181; CHECK-LABEL: @test_mm_cvtpd_epi32_zext(
182; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
183; CHECK-NEXT:    call void @llvm.donothing()
184; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
185; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
186; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
187; CHECK:       3:
188; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
189; CHECK-NEXT:    unreachable
190; CHECK:       4:
191; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
192; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
193; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
194; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
195; CHECK-NEXT:    ret <2 x i64> [[BC]]
196;
197  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
198  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
199  %bc = bitcast <4 x i32> %res to <2 x i64>
200  ret <2 x i64> %bc
201}
202
203
204define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
205; CHECK-LABEL: @test_mm_cvtpd_epi32_zext_load(
206; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
207; CHECK-NEXT:    call void @llvm.donothing()
208; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
209; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
210; CHECK:       2:
211; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
212; CHECK-NEXT:    unreachable
213; CHECK:       3:
214; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
215; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
216; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
217; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
218; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
219; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
220; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
221; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
222; CHECK:       8:
223; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
224; CHECK-NEXT:    unreachable
225; CHECK:       9:
226; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
227; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
228; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
229; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
230; CHECK-NEXT:    ret <2 x i64> [[BC]]
231;
232  %a0 = load <2 x double>, ptr %p0
233  %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
234  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
235  %bc = bitcast <4 x i32> %res to <2 x i64>
236  ret <2 x i64> %bc
237}
238
239
240define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 {
241; CHECK-LABEL: @test_x86_sse2_cvtpd2ps(
242; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
243; CHECK-NEXT:    call void @llvm.donothing()
244; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
245; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
246; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
247; CHECK:       3:
248; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
249; CHECK-NEXT:    unreachable
250; CHECK:       4:
251; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
252; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
253; CHECK-NEXT:    ret <4 x float> [[RES]]
254;
255  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
256  ret <4 x float> %res
257}
258declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
259
260define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 {
261; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext(
262; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
263; CHECK-NEXT:    call void @llvm.donothing()
264; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
265; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
266; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
267; CHECK:       3:
268; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
269; CHECK-NEXT:    unreachable
270; CHECK:       4:
271; CHECK-NEXT:    [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
272; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
273; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
274; CHECK-NEXT:    ret <4 x float> [[RES]]
275;
276  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
277  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
278  ret <4 x float> %res
279}
280
281define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 {
282; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext_load(
283; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
284; CHECK-NEXT:    call void @llvm.donothing()
285; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
286; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
287; CHECK:       2:
288; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
289; CHECK-NEXT:    unreachable
290; CHECK:       3:
291; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
292; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
293; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
294; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
295; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
296; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
297; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
298; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
299; CHECK:       8:
300; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
301; CHECK-NEXT:    unreachable
302; CHECK:       9:
303; CHECK-NEXT:    [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]])
304; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
305; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
306; CHECK-NEXT:    ret <4 x float> [[RES]]
307;
308  %a0 = load <2 x double>, ptr %p0
309  %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
310  %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
311  ret <4 x float> %res
312}
313
314define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
315; CHECK-LABEL: @test_x86_sse2_cvtps2dq(
316; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
317; CHECK-NEXT:    call void @llvm.donothing()
318; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
319; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
320; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
321; CHECK:       3:
322; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
323; CHECK-NEXT:    unreachable
324; CHECK:       4:
325; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
326; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
327; CHECK-NEXT:    ret <4 x i32> [[RES]]
328;
329  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
330  ret <4 x i32> %res
331}
332declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
333
334
335define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 {
336; CHECK-LABEL: @test_x86_sse2_cvtsd2si(
337; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
338; CHECK-NEXT:    call void @llvm.donothing()
339; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
340; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
341; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
342; CHECK:       3:
343; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
344; CHECK-NEXT:    unreachable
345; CHECK:       4:
346; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]])
347; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
348; CHECK-NEXT:    ret i32 [[RES]]
349;
350  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
351  ret i32 %res
352}
353declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
354
355
356define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 {
357; CHECK-LABEL: @test_x86_sse2_cvtsd2ss(
358; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
359; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
360; CHECK-NEXT:    call void @llvm.donothing()
361; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
362; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
363; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
364; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
365; CHECK:       5:
366; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
367; CHECK-NEXT:    unreachable
368; CHECK:       6:
369; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]])
370; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
371; CHECK-NEXT:    ret <4 x float> [[RES]]
372;
373  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
374  ret <4 x float> %res
375}
376declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
377
378
379define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 {
380; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load(
381; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
382; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
383; CHECK-NEXT:    call void @llvm.donothing()
384; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
385; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
386; CHECK:       3:
387; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
388; CHECK-NEXT:    unreachable
389; CHECK:       4:
390; CHECK-NEXT:    [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
391; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64
392; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
393; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
394; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
395; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
396; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
397; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
398; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
399; CHECK:       10:
400; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
401; CHECK-NEXT:    unreachable
402; CHECK:       11:
403; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
404; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
405; CHECK-NEXT:    ret <4 x float> [[RES]]
406;
407  %a1 = load <2 x double>, ptr %p1
408  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
409  ret <4 x float> %res
410}
411
412
413define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1) optsize #0 {
414; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load_optsize(
415; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
416; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
417; CHECK-NEXT:    call void @llvm.donothing()
418; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
419; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
420; CHECK:       3:
421; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
422; CHECK-NEXT:    unreachable
423; CHECK:       4:
424; CHECK-NEXT:    [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
425; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64
426; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
427; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
428; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
429; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
430; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
431; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
432; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
433; CHECK:       10:
434; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
435; CHECK-NEXT:    unreachable
436; CHECK:       11:
437; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
438; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
439; CHECK-NEXT:    ret <4 x float> [[RES]]
440;
441  %a1 = load <2 x double>, ptr %p1
442  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
443  ret <4 x float> %res
444}
445
446
447define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 {
448; CHECK-LABEL: @test_x86_sse2_cvttpd2dq(
449; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
450; CHECK-NEXT:    call void @llvm.donothing()
451; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
452; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
453; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
454; CHECK:       3:
455; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
456; CHECK-NEXT:    unreachable
457; CHECK:       4:
458; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
459; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
460; CHECK-NEXT:    ret <4 x i32> [[RES]]
461;
462  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
463  ret <4 x i32> %res
464}
465declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
466
467
468define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 {
469; CHECK-LABEL: @test_mm_cvttpd_epi32_zext(
470; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
471; CHECK-NEXT:    call void @llvm.donothing()
472; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
473; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
474; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
475; CHECK:       3:
476; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
477; CHECK-NEXT:    unreachable
478; CHECK:       4:
479; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
480; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
481; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
482; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
483; CHECK-NEXT:    ret <2 x i64> [[BC]]
484;
485  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
486  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
487  %bc = bitcast <4 x i32> %res to <2 x i64>
488  ret <2 x i64> %bc
489}
490
491
492define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 {
493; CHECK-LABEL: @test_mm_cvttpd_epi32_zext_load(
494; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
495; CHECK-NEXT:    call void @llvm.donothing()
496; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
497; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
498; CHECK:       2:
499; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
500; CHECK-NEXT:    unreachable
501; CHECK:       3:
502; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
503; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
504; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
505; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
506; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
507; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
508; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
509; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
510; CHECK:       8:
511; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
512; CHECK-NEXT:    unreachable
513; CHECK:       9:
514; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]])
515; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
516; CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
517; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
518; CHECK-NEXT:    ret <2 x i64> [[BC]]
519;
520  %a0 = load <2 x double>, ptr %p0
521  %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
522  %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
523  %bc = bitcast <4 x i32> %res to <2 x i64>
524  ret <2 x i64> %bc
525}
526
527
528define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 {
529; CHECK-LABEL: @test_x86_sse2_cvttps2dq(
530; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
531; CHECK-NEXT:    call void @llvm.donothing()
532; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
533; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
534; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
535; CHECK:       3:
536; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
537; CHECK-NEXT:    unreachable
538; CHECK:       4:
539; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]])
540; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
541; CHECK-NEXT:    ret <4 x i32> [[RES]]
542;
543  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
544  ret <4 x i32> %res
545}
546declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
547
548
549define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 {
550; CHECK-LABEL: @test_x86_sse2_cvttsd2si(
551; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
552; CHECK-NEXT:    call void @llvm.donothing()
553; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
554; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
555; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
556; CHECK:       3:
557; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
558; CHECK-NEXT:    unreachable
559; CHECK:       4:
560; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]])
561; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
562; CHECK-NEXT:    ret i32 [[RES]]
563;
564  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
565  ret i32 %res
566}
567declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
568
569
570define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) #0 {
571; CHECK-LABEL: @test_x86_sse2_max_pd(
572; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
573; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
574; CHECK-NEXT:    call void @llvm.donothing()
575; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
576; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
577; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
578; CHECK-NEXT:    ret <2 x double> [[RES]]
579;
580  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
581  ret <2 x double> %res
582}
583declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
584
585
586define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) #0 {
587; CHECK-LABEL: @test_x86_sse2_max_sd(
588; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
589; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
590; CHECK-NEXT:    call void @llvm.donothing()
591; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
592; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1>
593; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
594; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
595; CHECK-NEXT:    ret <2 x double> [[RES]]
596;
597  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
598  ret <2 x double> %res
599}
600declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
601
602
603define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) #0 {
604; CHECK-LABEL: @test_x86_sse2_min_pd(
605; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
606; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
607; CHECK-NEXT:    call void @llvm.donothing()
608; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
609; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
610; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
611; CHECK-NEXT:    ret <2 x double> [[RES]]
612;
613  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
614  ret <2 x double> %res
615}
616declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
617
618
619define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) #0 {
620; CHECK-LABEL: @test_x86_sse2_min_sd(
621; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
622; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
623; CHECK-NEXT:    call void @llvm.donothing()
624; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
625; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1>
626; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
627; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
628; CHECK-NEXT:    ret <2 x double> [[RES]]
629;
630  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
631  ret <2 x double> %res
632}
633declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
634
635
636define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 {
637; CHECK-LABEL: @test_x86_sse2_movmsk_pd(
638; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
639; CHECK-NEXT:    call void @llvm.donothing()
640; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
641; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
642; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
643; CHECK:       3:
644; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
645; CHECK-NEXT:    unreachable
646; CHECK:       4:
647; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]])
648; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
649; CHECK-NEXT:    ret i32 [[RES]]
650;
651  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
652  ret i32 %res
653}
654declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
655
656
657define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) #0 {
658; CHECK-LABEL: @test_x86_sse2_packssdw_128(
659; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
660; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
661; CHECK-NEXT:    call void @llvm.donothing()
662; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
663; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
664; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
665; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
666; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]])
667; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
668; CHECK-NEXT:    store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
669; CHECK-NEXT:    ret <8 x i16> [[RES]]
670;
671  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
672  ret <8 x i16> %res
673}
674declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
675
676
677define <8 x i16> @test_x86_sse2_packssdw_128_fold() #0 {
678; CHECK-LABEL: @test_x86_sse2_packssdw_128_fold(
679; CHECK-NEXT:    call void @llvm.donothing()
680; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer)
681; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
682; CHECK-NEXT:    store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
683; CHECK-NEXT:    ret <8 x i16> [[RES]]
684;
685  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
686  ret <8 x i16> %res
687}
688
689
690define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 {
691; CHECK-LABEL: @test_x86_sse2_packsswb_128(
692; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
693; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
694; CHECK-NEXT:    call void @llvm.donothing()
695; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer
696; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
697; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
698; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
699; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]])
700; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
701; CHECK-NEXT:    store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
702; CHECK-NEXT:    ret <16 x i8> [[RES]]
703;
704  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
705  ret <16 x i8> %res
706}
707declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
708
709
710define <16 x i8> @test_x86_sse2_packsswb_128_fold() #0 {
711; CHECK-LABEL: @test_x86_sse2_packsswb_128_fold(
712; CHECK-NEXT:    call void @llvm.donothing()
713; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer)
714; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
715; CHECK-NEXT:    store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
716; CHECK-NEXT:    ret <16 x i8> [[RES]]
717;
718  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
719  ret <16 x i8> %res
720}
721
722
723define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 {
724; CHECK-LABEL: @test_x86_sse2_packuswb_128(
725; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
726; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
727; CHECK-NEXT:    call void @llvm.donothing()
728; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer
729; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
730; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
731; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
732; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]])
733; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
734; CHECK-NEXT:    store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
735; CHECK-NEXT:    ret <16 x i8> [[RES]]
736;
737  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
738  ret <16 x i8> %res
739}
740declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
741
742
743define <16 x i8> @test_x86_sse2_packuswb_128_fold() #0 {
744; CHECK-LABEL: @test_x86_sse2_packuswb_128_fold(
745; CHECK-NEXT:    call void @llvm.donothing()
746; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer)
747; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
748; CHECK-NEXT:    store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
749; CHECK-NEXT:    ret <16 x i8> [[RES]]
750;
751  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
752  ret <16 x i8> %res
753}
754
755
756define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) #0 {
757; CHECK-LABEL: @test_x86_sse2_pavg_b(
758; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
759; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
760; CHECK-NEXT:    call void @llvm.donothing()
761; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
762; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
763; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
764; CHECK-NEXT:    ret <16 x i8> [[RES]]
765;
766  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
767  ret <16 x i8> %res
768}
769declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
770
771
772define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
773; CHECK-LABEL: @test_x86_sse2_pavg_w(
774; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
775; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
776; CHECK-NEXT:    call void @llvm.donothing()
777; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
778; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
779; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
780; CHECK-NEXT:    ret <8 x i16> [[RES]]
781;
782  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
783  ret <8 x i16> %res
784}
785declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
786
787
788define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
789; CHECK-LABEL: @test_x86_sse2_pmadd_wd(
790; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
791; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
792; CHECK-NEXT:    call void @llvm.donothing()
793; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
794; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
795; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
796; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
797; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
798; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
799; CHECK-NEXT:    ret <4 x i32> [[RES]]
800;
801  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
802  ret <4 x i32> %res
803}
804declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
805
806
807define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 {
808; CHECK-LABEL: @test_x86_sse2_pmovmskb_128(
809; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
810; CHECK-NEXT:    call void @llvm.donothing()
811; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
812; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
813; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
814; CHECK:       3:
815; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
816; CHECK-NEXT:    unreachable
817; CHECK:       4:
818; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]])
819; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
820; CHECK-NEXT:    ret i32 [[RES]]
821;
822  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
823  ret i32 %res
824}
825declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
826
827
828define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
829; CHECK-LABEL: @test_x86_sse2_pmulh_w(
830; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
831; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
832; CHECK-NEXT:    call void @llvm.donothing()
833; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
834; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
835; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
836; CHECK-NEXT:    ret <8 x i16> [[RES]]
837;
838  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
839  ret <8 x i16> %res
840}
841declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
842
843
844define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
845; CHECK-LABEL: @test_x86_sse2_pmulhu_w(
846; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
847; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
848; CHECK-NEXT:    call void @llvm.donothing()
849; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
850; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
851; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
852; CHECK-NEXT:    ret <8 x i16> [[RES]]
853;
854  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
855  ret <8 x i16> %res
856}
857declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
858
859
860define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 {
861; CHECK-LABEL: @test_x86_sse2_psad_bw(
862; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
863; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
864; CHECK-NEXT:    call void @llvm.donothing()
865; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
866; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
867; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
868; CHECK-NEXT:    [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
869; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48)
870; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
871; CHECK-NEXT:    store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
872; CHECK-NEXT:    ret <2 x i64> [[RES]]
873;
874  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
875  ret <2 x i64> %res
876}
877declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
878
879
880define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
881; CHECK-LABEL: @test_x86_sse2_psll_d(
882; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
883; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
884; CHECK-NEXT:    call void @llvm.donothing()
885; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
886; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
887; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
888; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
889; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
890; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
891; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
892; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
893; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
894; CHECK-NEXT:    ret <4 x i32> [[RES]]
895;
896  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
897  ret <4 x i32> %res
898}
899declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
900
901
902define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
903; CHECK-LABEL: @test_x86_sse2_psll_q(
904; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
905; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
906; CHECK-NEXT:    call void @llvm.donothing()
907; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
908; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
909; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
910; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
911; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
912; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
913; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
914; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
915; CHECK-NEXT:    store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
916; CHECK-NEXT:    ret <2 x i64> [[RES]]
917;
918  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
919  ret <2 x i64> %res
920}
921declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
922
923
924define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
925; CHECK-LABEL: @test_x86_sse2_psll_w(
926; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
927; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
928; CHECK-NEXT:    call void @llvm.donothing()
929; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
930; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
931; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
932; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
933; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
934; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
935; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
936; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
937; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
938; CHECK-NEXT:    ret <8 x i16> [[RES]]
939;
940  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
941  ret <8 x i16> %res
942}
943declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
944
945
946define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) #0 {
947; CHECK-LABEL: @test_x86_sse2_pslli_d(
948; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
949; CHECK-NEXT:    call void @llvm.donothing()
950; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[TMP1]], i32 7)
951; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
952; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[A0:%.*]], i32 7)
953; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
954; CHECK-NEXT:    ret <4 x i32> [[RES]]
955;
956  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
957  ret <4 x i32> %res
958}
959declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
960
961
962define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) #0 {
963; CHECK-LABEL: @test_x86_sse2_pslli_q(
964; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
965; CHECK-NEXT:    call void @llvm.donothing()
966; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[TMP1]], i32 7)
967; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer
968; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[A0:%.*]], i32 7)
969; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
970; CHECK-NEXT:    ret <2 x i64> [[RES]]
971;
972  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
973  ret <2 x i64> %res
974}
975declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
976
977
978define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) #0 {
979; CHECK-LABEL: @test_x86_sse2_pslli_w(
980; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
981; CHECK-NEXT:    call void @llvm.donothing()
982; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[TMP1]], i32 7)
983; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
984; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[A0:%.*]], i32 7)
985; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
986; CHECK-NEXT:    ret <8 x i16> [[RES]]
987;
988  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
989  ret <8 x i16> %res
990}
991declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
992
993
994define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
995; CHECK-LABEL: @test_x86_sse2_psra_d(
996; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
997; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
998; CHECK-NEXT:    call void @llvm.donothing()
999; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1000; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1001; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1002; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1003; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
1004; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1005; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
1006; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1007; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
1008; CHECK-NEXT:    ret <4 x i32> [[RES]]
1009;
1010  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1011  ret <4 x i32> %res
1012}
1013declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
1014
1015
1016define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
1017; CHECK-LABEL: @test_x86_sse2_psra_w(
1018; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1019; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1020; CHECK-NEXT:    call void @llvm.donothing()
1021; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1022; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1023; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1024; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1025; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
1026; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
1027; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
1028; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1029; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
1030; CHECK-NEXT:    ret <8 x i16> [[RES]]
1031;
1032  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1033  ret <8 x i16> %res
1034}
1035declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
1036
1037
1038define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) #0 {
1039; CHECK-LABEL: @test_x86_sse2_psrai_d(
1040; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1041; CHECK-NEXT:    call void @llvm.donothing()
1042; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[TMP1]], i32 7)
1043; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
1044; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[A0:%.*]], i32 7)
1045; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
1046; CHECK-NEXT:    ret <4 x i32> [[RES]]
1047;
1048  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1049  ret <4 x i32> %res
1050}
1051declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1052
1053
1054define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) #0 {
1055; CHECK-LABEL: @test_x86_sse2_psrai_w(
1056; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1057; CHECK-NEXT:    call void @llvm.donothing()
1058; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[TMP1]], i32 7)
1059; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
1060; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[A0:%.*]], i32 7)
1061; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
1062; CHECK-NEXT:    ret <8 x i16> [[RES]]
1063;
1064  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1065  ret <8 x i16> %res
1066}
1067declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1068
1069
1070define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1071; CHECK-LABEL: @test_x86_sse2_psrl_d(
1072; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1073; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1074; CHECK-NEXT:    call void @llvm.donothing()
1075; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1076; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1077; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1078; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1079; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
1080; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1081; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
1082; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1083; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
1084; CHECK-NEXT:    ret <4 x i32> [[RES]]
1085;
1086  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1087  ret <4 x i32> %res
1088}
1089declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1090
1091
1092define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1093; CHECK-LABEL: @test_x86_sse2_psrl_q(
1094; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1095; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1096; CHECK-NEXT:    call void @llvm.donothing()
1097; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1098; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1099; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1100; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1101; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
1102; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1103; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
1104; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1105; CHECK-NEXT:    store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
1106; CHECK-NEXT:    ret <2 x i64> [[RES]]
1107;
1108  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1109  ret <2 x i64> %res
1110}
1111declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1112
1113
1114define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
1115; CHECK-LABEL: @test_x86_sse2_psrl_w(
1116; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1117; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1118; CHECK-NEXT:    call void @llvm.donothing()
1119; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1120; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1121; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1122; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1123; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
1124; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
1125; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
1126; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1127; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
1128; CHECK-NEXT:    ret <8 x i16> [[RES]]
1129;
1130  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1131  ret <8 x i16> %res
1132}
1133declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1134
1135
1136define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 {
1137; CHECK-LABEL: @test_x86_sse2_psrl_w_load(
1138; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1139; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1140; CHECK-NEXT:    call void @llvm.donothing()
1141; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1142; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1143; CHECK:       3:
1144; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1145; CHECK-NEXT:    unreachable
1146; CHECK:       4:
1147; CHECK-NEXT:    [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
1148; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
1149; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1150; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1151; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
1152; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
1153; CHECK-NEXT:    [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64
1154; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
1155; CHECK-NEXT:    [[TMP11:%.*]] = sext i1 [[TMP10]] to i128
1156; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <8 x i16>
1157; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP2]], <8 x i16> [[A1]])
1158; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], [[TMP12]]
1159; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1160; CHECK-NEXT:    store <8 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8
1161; CHECK-NEXT:    ret <8 x i16> [[RES]]
1162;
1163  %a1 = load <8 x i16>, ptr %p
1164  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1165  ret <8 x i16> %res
1166}
1167
1168
1169define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) #0 {
1170; CHECK-LABEL: @test_x86_sse2_psrli_d(
1171; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1172; CHECK-NEXT:    call void @llvm.donothing()
1173; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[TMP1]], i32 7)
1174; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
1175; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[A0:%.*]], i32 7)
1176; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
1177; CHECK-NEXT:    ret <4 x i32> [[RES]]
1178;
1179  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1180  ret <4 x i32> %res
1181}
1182declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1183
1184
1185define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) #0 {
1186; CHECK-LABEL: @test_x86_sse2_psrli_q(
1187; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1188; CHECK-NEXT:    call void @llvm.donothing()
1189; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[TMP1]], i32 7)
1190; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer
1191; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[A0:%.*]], i32 7)
1192; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
1193; CHECK-NEXT:    ret <2 x i64> [[RES]]
1194;
1195  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1196  ret <2 x i64> %res
1197}
1198declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1199
1200
1201define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) #0 {
1202; CHECK-LABEL: @test_x86_sse2_psrli_w(
1203; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1204; CHECK-NEXT:    call void @llvm.donothing()
1205; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[TMP1]], i32 7)
1206; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
1207; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[A0:%.*]], i32 7)
1208; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
1209; CHECK-NEXT:    ret <8 x i16> [[RES]]
1210;
1211  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1212  ret <8 x i16> %res
1213}
1214declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1215
1216
1217define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1218; CHECK-LABEL: @test_x86_sse2_ucomieq_sd(
1219; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1220; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1221; CHECK-NEXT:    call void @llvm.donothing()
1222; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1223; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1224; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1225; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1226; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1227; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1228; CHECK-NEXT:    ret i32 [[RES]]
1229;
1230  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1231  ret i32 %res
1232}
1233declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1234
1235
1236define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1237; CHECK-LABEL: @test_x86_sse2_ucomige_sd(
1238; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1239; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1240; CHECK-NEXT:    call void @llvm.donothing()
1241; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1242; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1243; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1244; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1245; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1246; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1247; CHECK-NEXT:    ret i32 [[RES]]
1248;
1249  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1250  ret i32 %res
1251}
1252declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1253
1254
1255define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1256; CHECK-LABEL: @test_x86_sse2_ucomigt_sd(
1257; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1258; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1259; CHECK-NEXT:    call void @llvm.donothing()
1260; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1261; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1262; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1263; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1264; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1265; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1266; CHECK-NEXT:    ret i32 [[RES]]
1267;
1268  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1269  ret i32 %res
1270}
1271declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1272
1273
1274define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1275; CHECK-LABEL: @test_x86_sse2_ucomile_sd(
1276; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1277; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1278; CHECK-NEXT:    call void @llvm.donothing()
1279; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1280; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1281; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1282; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1283; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1284; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1285; CHECK-NEXT:    ret i32 [[RES]]
1286;
1287  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1288  ret i32 %res
1289}
1290declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1291
1292
1293define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1294; CHECK-LABEL: @test_x86_sse2_ucomilt_sd(
1295; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1296; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1297; CHECK-NEXT:    call void @llvm.donothing()
1298; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1299; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1300; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1301; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1302; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1303; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1304; CHECK-NEXT:    ret i32 [[RES]]
1305;
1306  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1307  ret i32 %res
1308}
1309declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1310
1311
1312define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1313; CHECK-LABEL: @test_x86_sse2_ucomineq_sd(
1314; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1315; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1316; CHECK-NEXT:    call void @llvm.donothing()
1317; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1318; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1319; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1320; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1321; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1322; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1323; CHECK-NEXT:    ret i32 [[RES]]
1324;
1325  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1326  ret i32 %res
1327}
1328declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1329
1330define void @test_x86_sse2_pause() #0 {
1331; CHECK-LABEL: @test_x86_sse2_pause(
1332; CHECK-NEXT:    call void @llvm.donothing()
1333; CHECK-NEXT:    tail call void @llvm.x86.sse2.pause()
1334; CHECK-NEXT:    ret void
1335;
1336  tail call void @llvm.x86.sse2.pause()
1337  ret void
1338}
1339declare void @llvm.x86.sse2.pause() nounwind
1340
1341define void @lfence() nounwind #0 {
1342; CHECK-LABEL: @lfence(
1343; CHECK-NEXT:    call void @llvm.donothing()
1344; CHECK-NEXT:    tail call void @llvm.x86.sse2.lfence()
1345; CHECK-NEXT:    ret void
1346;
1347  tail call void @llvm.x86.sse2.lfence()
1348  ret void
1349}
1350declare void @llvm.x86.sse2.lfence() nounwind
1351
1352define void @mfence() nounwind #0 {
1353; CHECK-LABEL: @mfence(
1354; CHECK-NEXT:    call void @llvm.donothing()
1355; CHECK-NEXT:    tail call void @llvm.x86.sse2.mfence()
1356; CHECK-NEXT:    ret void
1357;
1358  tail call void @llvm.x86.sse2.mfence()
1359  ret void
1360}
1361declare void @llvm.x86.sse2.mfence() nounwind
1362
1363define void @clflush(ptr %p) nounwind #0 {
1364; CHECK-LABEL: @clflush(
1365; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1366; CHECK-NEXT:    call void @llvm.donothing()
1367; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1368; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1369; CHECK:       2:
1370; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1371; CHECK-NEXT:    unreachable
1372; CHECK:       3:
1373; CHECK-NEXT:    tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]])
1374; CHECK-NEXT:    ret void
1375;
1376  tail call void @llvm.x86.sse2.clflush(ptr %p)
1377  ret void
1378}
1379declare void @llvm.x86.sse2.clflush(ptr) nounwind
1380
1381attributes #0 = { sanitize_memory }
1382