xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll (revision 95e44d3670f402e0cb9b78fa3cce20d8edc1ac77)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes
2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3
4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
8; CHECK-LABEL: @test_x86_sse41_blendvpd(
9; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
12; CHECK-NEXT:    call void @llvm.donothing()
13; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64>
14; CHECK-NEXT:    [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63)
15; CHECK-NEXT:    [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1>
16; CHECK-NEXT:    [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63)
17; CHECK-NEXT:    [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1>
18; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
19; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64>
20; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x double> [[A0:%.*]] to <2 x i64>
21; CHECK-NEXT:    [[TMP12:%.*]] = xor <2 x i64> [[TMP10]], [[TMP11]]
22; CHECK-NEXT:    [[TMP13:%.*]] = or <2 x i64> [[TMP12]], [[TMP2]]
23; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], [[TMP3]]
24; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <2 x i1> [[TMP8]], <2 x i64> [[TMP14]], <2 x i64> [[TMP9]]
25; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]])
26; CHECK-NEXT:    store <2 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
27; CHECK-NEXT:    ret <2 x double> [[RES]]
28;
29  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
30  ret <2 x double> %res
31}
32declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
33
34
35define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
36; CHECK-LABEL: @test_x86_sse41_blendvps(
37; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
38; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
39; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
40; CHECK-NEXT:    call void @llvm.donothing()
41; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32>
42; CHECK-NEXT:    [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31)
43; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1>
44; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31)
45; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>
46; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
47; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32>
48; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x float> [[A0:%.*]] to <4 x i32>
49; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i32> [[TMP10]], [[TMP11]]
50; CHECK-NEXT:    [[TMP13:%.*]] = or <4 x i32> [[TMP12]], [[TMP2]]
51; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], [[TMP3]]
52; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP14]], <4 x i32> [[TMP9]]
53; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]])
54; CHECK-NEXT:    store <4 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
55; CHECK-NEXT:    ret <4 x float> [[RES]]
56;
57  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
58  ret <4 x float> %res
59}
60declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
61
62
63define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) #0 {
64; CHECK-LABEL: @test_x86_sse41_dppd(
65; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
66; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
67; CHECK-NEXT:    call void @llvm.donothing()
68; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
69; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> <i1 false, i1 true>, <2 x i64> [[TMP3]], <2 x i64> zeroinitializer
70; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP4]])
71; CHECK-NEXT:    [[_MSDPP:%.*]] = icmp eq i64 [[TMP5]], 0
72; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[_MSDPP]], <2 x i1> zeroinitializer, <2 x i1> <i1 false, i1 true>
73; CHECK-NEXT:    [[_MSDPP1:%.*]] = sext <2 x i1> [[TMP6]] to <2 x i64>
74; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 -18)
75; CHECK-NEXT:    store <2 x i64> [[_MSDPP1]], ptr @__msan_retval_tls, align 8
76; CHECK-NEXT:    ret <2 x double> [[RES]]
77;
78  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 -18) ; <<2 x double>> [#uses=1]
79  ret <2 x double> %res
80}
81declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
82
83
84define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) #0 {
85; CHECK-LABEL: @test_x86_sse41_dpps(
86; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
87; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
88; CHECK-NEXT:    call void @llvm.donothing()
89; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
90; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
91; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]])
92; CHECK-NEXT:    [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0
93; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[_MSDPP]], <4 x i1> zeroinitializer, <4 x i1> <i1 false, i1 true, i1 true, i1 true>
94; CHECK-NEXT:    [[_MSDPP1:%.*]] = sext <4 x i1> [[TMP6]] to <4 x i32>
95; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 -18)
96; CHECK-NEXT:    store <4 x i32> [[_MSDPP1]], ptr @__msan_retval_tls, align 8
97; CHECK-NEXT:    ret <4 x float> [[RES]]
98;
99  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 -18) ; <<4 x float>> [#uses=1]
100  ret <4 x float> %res
101}
102declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
103
104
105define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0 {
106; CHECK-LABEL: @test_x86_sse41_insertps(
107; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
108; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
109; CHECK-NEXT:    call void @llvm.donothing()
110; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
111; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
112; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
113; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
114; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
115; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]]
116; CHECK:       5:
117; CHECK-NEXT:    call void @__msan_warning_noreturn()
118; CHECK-NEXT:    unreachable
119; CHECK:       6:
120; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i8 17)
121; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
122; CHECK-NEXT:    ret <4 x float> [[RES]]
123;
124  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1]
125  ret <4 x float> %res
126}
127declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
128
129
130
131define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 {
132; CHECK-LABEL: @test_x86_sse41_mpsadbw(
133; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
134; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
135; CHECK-NEXT:    call void @llvm.donothing()
136; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
137; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
138; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
139; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
140; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
141; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
142; CHECK:       5:
143; CHECK-NEXT:    call void @__msan_warning_noreturn()
144; CHECK-NEXT:    unreachable
145; CHECK:       6:
146; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]], i8 7)
147; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
148; CHECK-NEXT:    ret <8 x i16> [[RES]]
149;
150  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
151  ret <8 x i16> %res
152}
153declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
154
155define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 {
156; CHECK-LABEL: @test_x86_sse41_mpsadbw_load_op0(
157; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
158; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
159; CHECK-NEXT:    call void @llvm.donothing()
160; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
161; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
162; CHECK:       3:
163; CHECK-NEXT:    call void @__msan_warning_noreturn()
164; CHECK-NEXT:    unreachable
165; CHECK:       4:
166; CHECK-NEXT:    [[A0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 16
167; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
168; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
169; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
170; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
171; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
172; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
173; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
174; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
175; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
176; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
177; CHECK:       10:
178; CHECK-NEXT:    call void @__msan_warning_noreturn()
179; CHECK-NEXT:    unreachable
180; CHECK:       11:
181; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> [[A0]], <16 x i8> [[A1:%.*]], i8 7)
182; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
183; CHECK-NEXT:    ret <8 x i16> [[RES]]
184;
185  %a0 = load <16 x i8>, ptr %ptr
186  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
187  ret <8 x i16> %res
188}
189
190define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) #0 {
191; CHECK-LABEL: @test_x86_sse41_packusdw(
192; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
193; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
194; CHECK-NEXT:    call void @llvm.donothing()
195; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
196; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
197; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
198; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
199; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]])
200; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
201; CHECK-NEXT:    store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
202; CHECK-NEXT:    ret <8 x i16> [[RES]]
203;
204  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
205  ret <8 x i16> %res
206}
207declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
208
209
210define <8 x i16> @test_x86_sse41_packusdw_fold() #0 {
211; CHECK-LABEL: @test_x86_sse41_packusdw_fold(
212; CHECK-NEXT:    call void @llvm.donothing()
213; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer)
214; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
215; CHECK-NEXT:    store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
216; CHECK-NEXT:    ret <8 x i16> [[RES]]
217;
218  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
219  ret <8 x i16> %res
220}
221
222
223define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) #0 {
224; CHECK-LABEL: @test_x86_sse41_pblendvb(
225; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
226; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
227; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
228; CHECK-NEXT:    call void @llvm.donothing()
229; CHECK-NEXT:    [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7)
230; CHECK-NEXT:    [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1>
231; CHECK-NEXT:    [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7)
232; CHECK-NEXT:    [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1>
233; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]
234; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]]
235; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i8> [[TMP9]], [[TMP2]]
236; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i8> [[TMP10]], [[TMP3]]
237; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[TMP8]]
238; CHECK-NEXT:    [[RES:%.*]] = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[A0]], <16 x i8> [[A1]], <16 x i8> [[A2]])
239; CHECK-NEXT:    store <16 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
240; CHECK-NEXT:    ret <16 x i8> [[RES]]
241;
242  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
243  ret <16 x i8> %res
244}
245declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
246
247
248define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) #0 {
249; CHECK-LABEL: @test_x86_sse41_phminposuw(
250; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
251; CHECK-NEXT:    call void @llvm.donothing()
252; CHECK-NEXT:    [[RES:%.*]] = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> [[A0:%.*]])
253; CHECK-NEXT:    store <8 x i16> [[TMP1]], ptr @__msan_retval_tls, align 8
254; CHECK-NEXT:    ret <8 x i16> [[RES]]
255;
256  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
257  ret <8 x i16> %res
258}
259declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
260
261
262define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) #0 {
263; CHECK-LABEL: @test_x86_sse41_ptestc(
264; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
265; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
266; CHECK-NEXT:    call void @llvm.donothing()
267; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
268; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
269; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
270; CHECK-NEXT:    [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
271; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
272; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
273; CHECK-NEXT:    ret i32 [[RES]]
274;
275  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
276  ret i32 %res
277}
278declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
279
280
281define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) #0 {
282; CHECK-LABEL: @test_x86_sse41_ptestnzc(
283; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
284; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
285; CHECK-NEXT:    call void @llvm.donothing()
286; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
287; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
288; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
289; CHECK-NEXT:    [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
290; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
291; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
292; CHECK-NEXT:    ret i32 [[RES]]
293;
294  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
295  ret i32 %res
296}
297declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
298
299
300define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) #0 {
301; CHECK-LABEL: @test_x86_sse41_ptestz(
302; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
303; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
304; CHECK-NEXT:    call void @llvm.donothing()
305; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
306; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
307; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
308; CHECK-NEXT:    [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
309; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse41.ptestz(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]])
310; CHECK-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
311; CHECK-NEXT:    ret i32 [[RES]]
312;
313  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
314  ret i32 %res
315}
316declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
317
318
319define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 {
320; CHECK-LABEL: @test_x86_sse41_round_pd(
321; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
322; CHECK-NEXT:    call void @llvm.donothing()
323; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> [[A0:%.*]], i32 7)
324; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8
325; CHECK-NEXT:    ret <2 x double> [[RES]]
326;
327  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
328  ret <2 x double> %res
329}
330declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
331
332
333define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 {
334; CHECK-LABEL: @test_x86_sse41_round_ps(
335; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
336; CHECK-NEXT:    call void @llvm.donothing()
337; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> [[A0:%.*]], i32 7)
338; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
339; CHECK-NEXT:    ret <4 x float> [[RES]]
340;
341  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
342  ret <4 x float> %res
343}
344declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
345
346
347define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) #0 {
348; CHECK-LABEL: @test_x86_sse41_round_sd(
349; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
350; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
351; CHECK-NEXT:    call void @llvm.donothing()
352; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i32> <i32 2, i32 1>
353; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 7)
354; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
355; CHECK-NEXT:    ret <2 x double> [[RES]]
356;
357  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
358  ret <2 x double> %res
359}
360declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
361
362
363define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0 {
364; CHECK-LABEL: @test_x86_sse41_round_sd_load(
365; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
366; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
367; CHECK-NEXT:    call void @llvm.donothing()
368; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
369; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
370; CHECK:       3:
371; CHECK-NEXT:    call void @__msan_warning_noreturn()
372; CHECK-NEXT:    unreachable
373; CHECK:       4:
374; CHECK-NEXT:    [[A1B:%.*]] = load <2 x double>, ptr [[A1:%.*]], align 16
375; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64
376; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
377; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
378; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
379; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[_MSLD]], <2 x i32> <i32 2, i32 1>
380; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1B]], i32 7)
381; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
382; CHECK-NEXT:    ret <2 x double> [[RES]]
383;
384  %a1b = load <2 x double>, ptr %a1
385  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
386  ret <2 x double> %res
387}
388
389
390define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 {
391; CHECK-LABEL: @test_x86_sse41_round_ss_load(
392; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
393; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
394; CHECK-NEXT:    call void @llvm.donothing()
395; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
396; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
397; CHECK:       3:
398; CHECK-NEXT:    call void @__msan_warning_noreturn()
399; CHECK-NEXT:    unreachable
400; CHECK:       4:
401; CHECK-NEXT:    [[A1B:%.*]] = load <4 x float>, ptr [[A1:%.*]], align 16
402; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64
403; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
404; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
405; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
406; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[_MSLD]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
407; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1B]], i32 7)
408; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
409; CHECK-NEXT:    ret <4 x float> [[RES]]
410;
411  %a1b = load <4 x float>, ptr %a1
412  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1b, i32 7) ; <<4 x float>> [#uses=1]
413  ret <4 x float> %res
414}
415declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
416
417attributes #0 = { sanitize_memory }
418