xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/X86/sse-intrinsics-x86.ll (revision 74acfa65efbbdce19a1d46d6868d4b16d1db73ad)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3
4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) #0 {
8; CHECK-LABEL: @test_x86_sse_cmp_ps(
9; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11; CHECK-NEXT:    call void @llvm.donothing()
12; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
13; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
14; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32>
15; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7)
16; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
17; CHECK-NEXT:    ret <4 x float> [[RES]]
18;
19  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
20  ret <4 x float> %res
21}
22declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
23
24
25define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) #0 {
26; CHECK-LABEL: @test_x86_sse_cmp_ss(
27; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
28; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
29; CHECK-NEXT:    call void @llvm.donothing()
30; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
31; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
32; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
33; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
34; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
35; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 7)
36; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8
37; CHECK-NEXT:    ret <4 x float> [[RES]]
38;
39  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
40  ret <4 x float> %res
41}
42declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
43
44
45define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) #0 {
46; CHECK-LABEL: @test_x86_sse_comieq_ss(
47; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
48; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
49; CHECK-NEXT:    call void @llvm.donothing()
50; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
51; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
52; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
53; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
54; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
55; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
56; CHECK-NEXT:    ret i32 [[RES]]
57;
58  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
59  ret i32 %res
60}
61declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
62
63
64define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) #0 {
65; CHECK-LABEL: @test_x86_sse_comige_ss(
66; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
67; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
68; CHECK-NEXT:    call void @llvm.donothing()
69; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
70; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
71; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
72; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
73; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
74; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
75; CHECK-NEXT:    ret i32 [[RES]]
76;
77  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
78  ret i32 %res
79}
80declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
81
82
83define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) #0 {
84; CHECK-LABEL: @test_x86_sse_comigt_ss(
85; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
86; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
87; CHECK-NEXT:    call void @llvm.donothing()
88; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
89; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
90; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
91; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
92; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
93; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
94; CHECK-NEXT:    ret i32 [[RES]]
95;
96  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
97  ret i32 %res
98}
99declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
100
101
102define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) #0 {
103; CHECK-LABEL: @test_x86_sse_comile_ss(
104; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
105; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
106; CHECK-NEXT:    call void @llvm.donothing()
107; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
108; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
109; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
110; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
111; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
112; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
113; CHECK-NEXT:    ret i32 [[RES]]
114;
115  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
116  ret i32 %res
117}
118declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
119
120
121define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) #0 {
122; CHECK-LABEL: @test_x86_sse_comilt_ss(
123; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
124; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
125; CHECK-NEXT:    call void @llvm.donothing()
126; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
127; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
128; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
129; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
130; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
131; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
132; CHECK-NEXT:    ret i32 [[RES]]
133;
134  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
135  ret i32 %res
136}
137declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
138
139
140define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) #0 {
141; CHECK-LABEL: @test_x86_sse_comineq_ss(
142; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
143; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
144; CHECK-NEXT:    call void @llvm.donothing()
145; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
146; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
147; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
148; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
149; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
150; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
151; CHECK-NEXT:    ret i32 [[RES]]
152;
153  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
154  ret i32 %res
155}
156declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
157
158
159define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) #0 {
160; CHECK-LABEL: @test_x86_sse_cvtss2si(
161; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
162; CHECK-NEXT:    call void @llvm.donothing()
163; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
164; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
165; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
166; CHECK:       3:
167; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
168; CHECK-NEXT:    unreachable
169; CHECK:       4:
170; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[A0:%.*]])
171; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
172; CHECK-NEXT:    ret i32 [[RES]]
173;
174  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
175  ret i32 %res
176}
177declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
178
179
180define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) #0 {
181; CHECK-LABEL: @test_x86_sse_cvttss2si(
182; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
183; CHECK-NEXT:    call void @llvm.donothing()
184; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
185; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
186; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
187; CHECK:       3:
188; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
189; CHECK-NEXT:    unreachable
190; CHECK:       4:
191; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[A0:%.*]])
192; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
193; CHECK-NEXT:    ret i32 [[RES]]
194;
195  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
196  ret i32 %res
197}
198declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
199
200
201define void @test_x86_sse_ldmxcsr(ptr %a0) #0 {
202; CHECK-LABEL: @test_x86_sse_ldmxcsr(
203; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
204; CHECK-NEXT:    call void @llvm.donothing()
205; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
206; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
207; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
208; CHECK-NEXT:    [[_LDMXCSR:%.*]] = load i32, ptr [[TMP4]], align 1
209; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
210; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_LDMXCSR]], 0
211; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
212; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
213; CHECK:       5:
214; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
215; CHECK-NEXT:    unreachable
216; CHECK:       6:
217; CHECK-NEXT:    call void @llvm.x86.sse.ldmxcsr(ptr [[A0]])
218; CHECK-NEXT:    ret void
219;
220  call void @llvm.x86.sse.ldmxcsr(ptr %a0)
221  ret void
222}
223declare void @llvm.x86.sse.ldmxcsr(ptr) nounwind
224
225
226
227define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) #0 {
228; CHECK-LABEL: @test_x86_sse_max_ps(
229; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
230; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
231; CHECK-NEXT:    call void @llvm.donothing()
232; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
233; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
234; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
235; CHECK-NEXT:    ret <4 x float> [[RES]]
236;
237  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
238  ret <4 x float> %res
239}
240declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
241
242
243define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
244; CHECK-LABEL: @test_x86_sse_max_ss(
245; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
246; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
247; CHECK-NEXT:    call void @llvm.donothing()
248; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
249; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
250; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
251; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
252; CHECK-NEXT:    ret <4 x float> [[RES]]
253;
254  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
255  ret <4 x float> %res
256}
257declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
258
259
260define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) #0 {
261; CHECK-LABEL: @test_x86_sse_min_ps(
262; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
263; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
264; CHECK-NEXT:    call void @llvm.donothing()
265; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
266; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
267; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
268; CHECK-NEXT:    ret <4 x float> [[RES]]
269;
270  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
271  ret <4 x float> %res
272}
273declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
274
275
276define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) #0 {
277; CHECK-LABEL: @test_x86_sse_min_ss(
278; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
279; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
280; CHECK-NEXT:    call void @llvm.donothing()
281; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
282; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
283; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
284; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
285; CHECK-NEXT:    ret <4 x float> [[RES]]
286;
287  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
288  ret <4 x float> %res
289}
290declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
291
292
293define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) #0 {
294; CHECK-LABEL: @test_x86_sse_movmsk_ps(
295; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
296; CHECK-NEXT:    call void @llvm.donothing()
297; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
298; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
299; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
300; CHECK:       3:
301; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
302; CHECK-NEXT:    unreachable
303; CHECK:       4:
304; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> [[A0:%.*]])
305; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
306; CHECK-NEXT:    ret i32 [[RES]]
307;
308  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
309  ret i32 %res
310}
311declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
312
313
314
315define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) #0 {
316; CHECK-LABEL: @test_x86_sse_rcp_ps(
317; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
318; CHECK-NEXT:    call void @llvm.donothing()
319; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> [[A0:%.*]])
320; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
321; CHECK-NEXT:    ret <4 x float> [[RES]]
322;
323  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
324  ret <4 x float> %res
325}
326declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
327
328
329define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) #0 {
330; CHECK-LABEL: @test_x86_sse_rcp_ss(
331; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
332; CHECK-NEXT:    call void @llvm.donothing()
333; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[A0:%.*]])
334; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
335; CHECK-NEXT:    ret <4 x float> [[RES]]
336;
337  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
338  ret <4 x float> %res
339}
340declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
341
342
343define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) #0 {
344; CHECK-LABEL: @test_x86_sse_rsqrt_ps(
345; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
346; CHECK-NEXT:    call void @llvm.donothing()
347; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> [[A0:%.*]])
348; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
349; CHECK-NEXT:    ret <4 x float> [[RES]]
350;
351  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
352  ret <4 x float> %res
353}
354declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
355
356
357define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) #0 {
358; CHECK-LABEL: @test_x86_sse_rsqrt_ss(
359; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
360; CHECK-NEXT:    call void @llvm.donothing()
361; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[A0:%.*]])
362; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
363; CHECK-NEXT:    ret <4 x float> [[RES]]
364;
365  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
366  ret <4 x float> %res
367}
368declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
369
370
371define void @test_x86_sse_stmxcsr(ptr %a0) #0 {
372; CHECK-LABEL: @test_x86_sse_stmxcsr(
373; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
374; CHECK-NEXT:    call void @llvm.donothing()
375; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
376; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
377; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
378; CHECK-NEXT:    store i32 0, ptr [[TMP4]], align 4
379; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
380; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
381; CHECK:       5:
382; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
383; CHECK-NEXT:    unreachable
384; CHECK:       6:
385; CHECK-NEXT:    call void @llvm.x86.sse.stmxcsr(ptr [[A0]])
386; CHECK-NEXT:    ret void
387;
388  call void @llvm.x86.sse.stmxcsr(ptr %a0)
389  ret void
390}
391declare void @llvm.x86.sse.stmxcsr(ptr) nounwind
392
393
394define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) #0 {
395; CHECK-LABEL: @test_x86_sse_ucomieq_ss(
396; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
397; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
398; CHECK-NEXT:    call void @llvm.donothing()
399; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
400; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
401; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
402; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
403; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
404; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
405; CHECK-NEXT:    ret i32 [[RES]]
406;
407  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
408  ret i32 %res
409}
410declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
411
412
413define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) #0 {
414; CHECK-LABEL: @test_x86_sse_ucomige_ss(
415; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
416; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
417; CHECK-NEXT:    call void @llvm.donothing()
418; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
419; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
420; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
421; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
422; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
423; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
424; CHECK-NEXT:    ret i32 [[RES]]
425;
426  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
427  ret i32 %res
428}
429declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
430
431
432define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) #0 {
433; CHECK-LABEL: @test_x86_sse_ucomigt_ss(
434; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
435; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
436; CHECK-NEXT:    call void @llvm.donothing()
437; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
438; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
439; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
440; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
441; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
442; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
443; CHECK-NEXT:    ret i32 [[RES]]
444;
445  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
446  ret i32 %res
447}
448declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
449
450
451define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) #0 {
452; CHECK-LABEL: @test_x86_sse_ucomile_ss(
453; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
454; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
455; CHECK-NEXT:    call void @llvm.donothing()
456; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
457; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
458; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
459; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
460; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
461; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
462; CHECK-NEXT:    ret i32 [[RES]]
463;
464  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
465  ret i32 %res
466}
467declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
468
469
470define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) #0 {
471; CHECK-LABEL: @test_x86_sse_ucomilt_ss(
472; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
473; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
474; CHECK-NEXT:    call void @llvm.donothing()
475; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
476; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
477; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
478; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
479; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
480; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
481; CHECK-NEXT:    ret i32 [[RES]]
482;
483  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
484  ret i32 %res
485}
486declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
487
488
489define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) #0 {
490; CHECK-LABEL: @test_x86_sse_ucomineq_ss(
491; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
492; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
493; CHECK-NEXT:    call void @llvm.donothing()
494; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
495; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
496; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
497; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
498; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
499; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
500; CHECK-NEXT:    ret i32 [[RES]]
501;
502  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
503  ret i32 %res
504}
505declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
506
507
508define void @sfence() nounwind {
509; CHECK-LABEL: @sfence(
510; CHECK-NEXT:    call void @llvm.donothing()
511; CHECK-NEXT:    tail call void @llvm.x86.sse.sfence()
512; CHECK-NEXT:    ret void
513;
514  tail call void @llvm.x86.sse.sfence()
515  ret void
516}
517declare void @llvm.x86.sse.sfence() nounwind
518
519attributes #0 = { sanitize_memory }
520