xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll (revision fdadef9be33fb3f0844b85c35a63a8a4137382dd)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s
3;
4; Forked from llvm/test/CodeGen/X86/avx512-intrinsics.ll
5
6target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
7target triple = "x86_64-unknown-linux-gnu"
8
9define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
10; CHECK-LABEL: @test_mask_compress_pd_512(
11; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
14; CHECK-NEXT:    call void @llvm.donothing()
15; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
16; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
17; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
18; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
19; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
20; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
21; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
22; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
23; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
24; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
25; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1:![0-9]+]]
26; CHECK:       9:
27; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10:[0-9]+]]
28; CHECK-NEXT:    unreachable
29; CHECK:       10:
30; CHECK-NEXT:    [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
31; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
32; CHECK-NEXT:    ret <8 x double> [[TMP11]]
33;
34  %1 = bitcast i8 %mask to <8 x i1>
35  %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
36  ret <8 x double> %2
37}
38
39define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) #0 {
40; CHECK-LABEL: @test_maskz_compress_pd_512(
41; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
42; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
43; CHECK-NEXT:    call void @llvm.donothing()
44; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
45; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
46; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
47; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
48; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
49; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
50; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
51; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
52; CHECK:       7:
53; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
54; CHECK-NEXT:    unreachable
55; CHECK:       8:
56; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
57; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
58; CHECK-NEXT:    ret <8 x double> [[TMP9]]
59;
60  %1 = bitcast i8 %mask to <8 x i1>
61  %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
62  ret <8 x double> %2
63}
64
65define <8 x double> @test_compress_pd_512(<8 x double> %data, <8 x double> %extra_param) #0 {
66; CHECK-LABEL: @test_compress_pd_512(
67; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
68; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
69; CHECK-NEXT:    call void @llvm.donothing()
70; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
71; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
72; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP7]] to i512
73; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
74; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
75; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
76; CHECK:       5:
77; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
78; CHECK-NEXT:    unreachable
79; CHECK:       6:
80; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[EXTRA_PARAM:%.*]], <8 x i1> splat (i1 true))
81; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
82; CHECK-NEXT:    ret <8 x double> [[TMP2]]
83;
84  %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %extra_param, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
85  ret <8 x double> %1
86}
87
88define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
89; CHECK-LABEL: @test_mask_compress_ps_512(
90; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
91; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
92; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
93; CHECK-NEXT:    call void @llvm.donothing()
94; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
95; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
96; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
97; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
98; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
99; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
100; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
101; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
102; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
103; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
104; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
105; CHECK:       9:
106; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
107; CHECK-NEXT:    unreachable
108; CHECK:       10:
109; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
110; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
111; CHECK-NEXT:    ret <16 x float> [[TMP11]]
112;
113  %1 = bitcast i16 %mask to <16 x i1>
114  %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
115  ret <16 x float> %2
116}
117
118define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) #0 {
119; CHECK-LABEL: @test_maskz_compress_ps_512(
120; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
121; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
122; CHECK-NEXT:    call void @llvm.donothing()
123; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
124; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
125; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
126; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
127; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
128; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
129; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
130; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
131; CHECK:       7:
132; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
133; CHECK-NEXT:    unreachable
134; CHECK:       8:
135; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
136; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
137; CHECK-NEXT:    ret <16 x float> [[TMP9]]
138;
139  %1 = bitcast i16 %mask to <16 x i1>
140  %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
141  ret <16 x float> %2
142}
143
144define <16 x float> @test_compress_ps_512(<16 x float> %data, <16 x float> %extra_param) #0 {
145; CHECK-LABEL: @test_compress_ps_512(
146; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
147; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
148; CHECK-NEXT:    call void @llvm.donothing()
149; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
150; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
151; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP7]] to i512
152; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
153; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
154; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
155; CHECK:       5:
156; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
157; CHECK-NEXT:    unreachable
158; CHECK:       6:
159; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[EXTRA_PARAM:%.*]], <16 x i1> splat (i1 true))
160; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
161; CHECK-NEXT:    ret <16 x float> [[TMP2]]
162;
163  %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %extra_param, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
164  ret <16 x float> %1
165}
166
167define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
168; CHECK-LABEL: @test_mask_compress_q_512(
169; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
170; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
171; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
172; CHECK-NEXT:    call void @llvm.donothing()
173; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
174; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
175; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
176; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
177; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
178; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
179; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
180; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
181; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
182; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
183; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
184; CHECK:       9:
185; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
186; CHECK-NEXT:    unreachable
187; CHECK:       10:
188; CHECK-NEXT:    [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
189; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
190; CHECK-NEXT:    ret <8 x i64> [[TMP11]]
191;
192  %1 = bitcast i8 %mask to <8 x i1>
193  %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
194  ret <8 x i64> %2
195}
196
197define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) #0 {
198; CHECK-LABEL: @test_maskz_compress_q_512(
199; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
200; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
201; CHECK-NEXT:    call void @llvm.donothing()
202; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
203; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
204; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
205; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
206; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
207; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
208; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
209; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
210; CHECK:       7:
211; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
212; CHECK-NEXT:    unreachable
213; CHECK:       8:
214; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
215; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
216; CHECK-NEXT:    ret <8 x i64> [[TMP9]]
217;
218  %1 = bitcast i8 %mask to <8 x i1>
219  %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
220  ret <8 x i64> %2
221}
222
223define <8 x i64> @test_compress_q_512(<8 x i64> %data, <8 x i64> %extra_param) #0 {
224; CHECK-LABEL: @test_compress_q_512(
225; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
226; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
227; CHECK-NEXT:    call void @llvm.donothing()
228; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
229; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
230; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP7]] to i512
231; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
232; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
233; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
234; CHECK:       5:
235; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
236; CHECK-NEXT:    unreachable
237; CHECK:       6:
238; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[EXTRA_PARAM:%.*]], <8 x i1> splat (i1 true))
239; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
240; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
241;
242  %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %extra_param, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
243  ret <8 x i64> %1
244}
245
246define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
247; CHECK-LABEL: @test_mask_compress_d_512(
248; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
249; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
250; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
251; CHECK-NEXT:    call void @llvm.donothing()
252; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
253; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
254; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
255; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
256; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
257; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
258; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
259; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
260; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
261; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
262; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
263; CHECK:       9:
264; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
265; CHECK-NEXT:    unreachable
266; CHECK:       10:
267; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
268; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
269; CHECK-NEXT:    ret <16 x i32> [[TMP11]]
270;
271  %1 = bitcast i16 %mask to <16 x i1>
272  %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
273  ret <16 x i32> %2
274}
275
276define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) #0 {
277; CHECK-LABEL: @test_maskz_compress_d_512(
278; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
279; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
280; CHECK-NEXT:    call void @llvm.donothing()
281; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
282; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
283; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
284; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
285; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
286; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
287; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
288; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
289; CHECK:       7:
290; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
291; CHECK-NEXT:    unreachable
292; CHECK:       8:
293; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
294; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
295; CHECK-NEXT:    ret <16 x i32> [[TMP9]]
296;
297  %1 = bitcast i16 %mask to <16 x i1>
298  %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
299  ret <16 x i32> %2
300}
301
302define <16 x i32> @test_compress_d_512(<16 x i32> %data, <16 x i32> %extra_param) #0 {
303; CHECK-LABEL: @test_compress_d_512(
304; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
305; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
306; CHECK-NEXT:    call void @llvm.donothing()
307; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
308; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
309; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP7]] to i512
310; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
311; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
312; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
313; CHECK:       5:
314; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
315; CHECK-NEXT:    unreachable
316; CHECK:       6:
317; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[EXTRA_PARAM:%.*]], <16 x i1> splat (i1 true))
318; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
319; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
320;
321  %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %extra_param, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
322  ret <16 x i32> %1
323}
324
325define <8 x double> @test_expand_pd_512(<8 x double> %data, <8 x double> %extra_param) #0 {
326; CHECK-LABEL: @test_expand_pd_512(
327; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
328; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
329; CHECK-NEXT:    call void @llvm.donothing()
330; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
331; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
332; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP7]] to i512
333; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
334; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
335; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
336; CHECK:       5:
337; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
338; CHECK-NEXT:    unreachable
339; CHECK:       6:
340; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[EXTRA_PARAM:%.*]], <8 x i1> splat (i1 true))
341; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
342; CHECK-NEXT:    ret <8 x double> [[TMP2]]
343;
344  %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %extra_param, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
345  ret <8 x double> %1
346}
347
348define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
349; CHECK-LABEL: @test_mask_expand_pd_512(
350; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
351; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
352; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
353; CHECK-NEXT:    call void @llvm.donothing()
354; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
355; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
356; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
357; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
358; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
359; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
360; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
361; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
362; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
363; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
364; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
365; CHECK:       9:
366; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
367; CHECK-NEXT:    unreachable
368; CHECK:       10:
369; CHECK-NEXT:    [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
370; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
371; CHECK-NEXT:    ret <8 x double> [[TMP11]]
372;
373  %1 = bitcast i8 %mask to <8 x i1>
374  %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
375  ret <8 x double> %2
376}
377
378define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) #0 {
379; CHECK-LABEL: @test_maskz_expand_pd_512(
380; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
381; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
382; CHECK-NEXT:    call void @llvm.donothing()
383; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
384; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
385; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
386; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
387; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
388; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
389; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
390; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
391; CHECK:       7:
392; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
393; CHECK-NEXT:    unreachable
394; CHECK:       8:
395; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
396; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
397; CHECK-NEXT:    ret <8 x double> [[TMP9]]
398;
399  %1 = bitcast i8 %mask to <8 x i1>
400  %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
401  ret <8 x double> %2
402}
403
404define <16 x float> @test_expand_ps_512(<16 x float> %data, <16 x float> %extra_param) #0 {
405; CHECK-LABEL: @test_expand_ps_512(
406; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
407; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
408; CHECK-NEXT:    call void @llvm.donothing()
409; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
410; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
411; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP7]] to i512
412; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
413; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
414; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
415; CHECK:       5:
416; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
417; CHECK-NEXT:    unreachable
418; CHECK:       6:
419; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[EXTRA_PARAM:%.*]], <16 x i1> splat (i1 true))
420; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
421; CHECK-NEXT:    ret <16 x float> [[TMP2]]
422;
423  %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %extra_param, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
424  ret <16 x float> %1
425}
426
427define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
428; CHECK-LABEL: @test_mask_expand_ps_512(
429; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
430; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
431; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
432; CHECK-NEXT:    call void @llvm.donothing()
433; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
434; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
435; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
436; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
437; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
438; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
439; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
440; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
441; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
442; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
443; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
444; CHECK:       9:
445; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
446; CHECK-NEXT:    unreachable
447; CHECK:       10:
448; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
449; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
450; CHECK-NEXT:    ret <16 x float> [[TMP11]]
451;
452  %1 = bitcast i16 %mask to <16 x i1>
453  %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
454  ret <16 x float> %2
455}
456
457define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) #0 {
458; CHECK-LABEL: @test_maskz_expand_ps_512(
459; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
460; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
461; CHECK-NEXT:    call void @llvm.donothing()
462; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
463; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
464; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
465; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
466; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
467; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
468; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
469; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
470; CHECK:       7:
471; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
472; CHECK-NEXT:    unreachable
473; CHECK:       8:
474; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
475; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
476; CHECK-NEXT:    ret <16 x float> [[TMP9]]
477;
478  %1 = bitcast i16 %mask to <16 x i1>
479  %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
480  ret <16 x float> %2
481}
482
483define <8 x i64> @test_expand_q_512(<8 x i64> %data, <8 x i64> %extra_param) #0 {
484; CHECK-LABEL: @test_expand_q_512(
485; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
486; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
487; CHECK-NEXT:    call void @llvm.donothing()
488; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
489; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
490; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP7]] to i512
491; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
492; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
493; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
494; CHECK:       5:
495; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
496; CHECK-NEXT:    unreachable
497; CHECK:       6:
498; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[EXTRA_PARAM:%.*]], <8 x i1> splat (i1 true))
499; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
500; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
501;
502  %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %extra_param, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
503  ret <8 x i64> %1
504}
505
506define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
507; CHECK-LABEL: @test_mask_expand_q_512(
508; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
509; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
510; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
511; CHECK-NEXT:    call void @llvm.donothing()
512; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
513; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
514; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
515; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
516; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
517; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
518; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
519; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
520; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
521; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
522; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
523; CHECK:       9:
524; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
525; CHECK-NEXT:    unreachable
526; CHECK:       10:
527; CHECK-NEXT:    [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
528; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
529; CHECK-NEXT:    ret <8 x i64> [[TMP11]]
530;
531  %1 = bitcast i8 %mask to <8 x i1>
532  %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
533  ret <8 x i64> %2
534}
535
536define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) #0 {
537; CHECK-LABEL: @test_maskz_expand_q_512(
538; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
539; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
540; CHECK-NEXT:    call void @llvm.donothing()
541; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
542; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
543; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
544; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
545; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
546; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
547; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
548; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
549; CHECK:       7:
550; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
551; CHECK-NEXT:    unreachable
552; CHECK:       8:
553; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
554; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
555; CHECK-NEXT:    ret <8 x i64> [[TMP9]]
556;
557  %1 = bitcast i8 %mask to <8 x i1>
558  %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
559  ret <8 x i64> %2
560}
561
562define <16 x i32> @test_expand_d_512(<16 x i32> %data, <16 x i32> %extra_param) #0 {
563; CHECK-LABEL: @test_expand_d_512(
564; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
565; CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
566; CHECK-NEXT:    call void @llvm.donothing()
567; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
568; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
569; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP7]] to i512
570; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
571; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
572; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
573; CHECK:       5:
574; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
575; CHECK-NEXT:    unreachable
576; CHECK:       6:
577; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[EXTRA_PARAM:%.*]], <16 x i1> splat (i1 true))
578; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
579; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
580;
581  %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %extra_param, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
582  ret <16 x i32> %1
583}
584
585define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
586; CHECK-LABEL: @test_mask_expand_d_512(
587; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
588; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
589; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
590; CHECK-NEXT:    call void @llvm.donothing()
591; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
592; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
593; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
594; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
595; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
596; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
597; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
598; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
599; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
600; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
601; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
602; CHECK:       9:
603; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
604; CHECK-NEXT:    unreachable
605; CHECK:       10:
606; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
607; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
608; CHECK-NEXT:    ret <16 x i32> [[TMP11]]
609;
610  %1 = bitcast i16 %mask to <16 x i1>
611  %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
612  ret <16 x i32> %2
613}
614
615define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) #0 {
616; CHECK-LABEL: @test_maskz_expand_d_512(
617; CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
618; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
619; CHECK-NEXT:    call void @llvm.donothing()
620; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
621; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
622; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
623; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
624; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
625; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
626; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
627; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
628; CHECK:       7:
629; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
630; CHECK-NEXT:    unreachable
631; CHECK:       8:
632; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
633; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
634; CHECK-NEXT:    ret <16 x i32> [[TMP9]]
635;
636  %1 = bitcast i16 %mask to <16 x i1>
637  %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
638  ret <16 x i32> %2
639}
640
641define <16 x float> @test_rcp_ps_512(<16 x float> %a0) #0 {
642; CHECK-LABEL: @test_rcp_ps_512(
643; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
644; CHECK-NEXT:    call void @llvm.donothing()
645; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
646; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
647; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
648; CHECK:       3:
649; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
650; CHECK-NEXT:    unreachable
651; CHECK:       4:
652; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
653; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
654; CHECK-NEXT:    ret <16 x float> [[RES]]
655;
656  %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
657  ret <16 x float> %res
658}
659declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
660
661define <8 x double> @test_rcp_pd_512(<8 x double> %a0) #0 {
662; CHECK-LABEL: @test_rcp_pd_512(
663; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
664; CHECK-NEXT:    call void @llvm.donothing()
665; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
666; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
667; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
668; CHECK:       3:
669; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
670; CHECK-NEXT:    unreachable
671; CHECK:       4:
672; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1)
673; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
674; CHECK-NEXT:    ret <8 x double> [[RES]]
675;
676  %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
677  ret <8 x double> %res
678}
679declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
680
681declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
682
683define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b, <2 x double> %extra_param) #0 {
684; CHECK-LABEL: @test_rndscale_sd(
685; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
686; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
687; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
688; CHECK-NEXT:    call void @llvm.donothing()
689; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
690; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
691; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
692; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
693; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
694; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to i128
695; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
696; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
697; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
698; CHECK:       7:
699; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
700; CHECK-NEXT:    unreachable
701; CHECK:       8:
702; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[EXTRA_PARAM:%.*]], i8 -1, i32 11, i32 4)
703; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
704; CHECK-NEXT:    ret <2 x double> [[RES]]
705;
706  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %extra_param, i8 -1, i32 11, i32 4)
707  ret <2 x double>%res
708}
709
710define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) #0 {
711; CHECK-LABEL: @test_rndscale_sd_mask(
712; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
713; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
714; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
715; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
716; CHECK-NEXT:    call void @llvm.donothing()
717; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
718; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
719; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
720; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
721; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
722; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
723; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
724; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
725; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
726; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
727; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
728; CHECK:       8:
729; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
730; CHECK-NEXT:    unreachable
731; CHECK:       9:
732; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
733; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
734; CHECK-NEXT:    ret <2 x double> [[RES]]
735;
736  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
737  ret <2 x double>%res
738}
739
740define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, ptr %bptr, <2 x double> %c, i8 %mask) #0 {
741; CHECK-LABEL: @test_rndscale_sd_mask_load(
742; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
743; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
744; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
745; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
746; CHECK-NEXT:    call void @llvm.donothing()
747; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
748; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
749; CHECK:       5:
750; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
751; CHECK-NEXT:    unreachable
752; CHECK:       6:
753; CHECK-NEXT:    [[B:%.*]] = load <2 x double>, ptr [[BPTR:%.*]], align 16
754; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[BPTR]] to i64
755; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
756; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
757; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 16
758; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
759; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0
760; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
761; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0
762; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
763; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
764; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0
765; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
766; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP4]], 0
767; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
768; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
769; CHECK:       13:
770; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
771; CHECK-NEXT:    unreachable
772; CHECK:       14:
773; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
774; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
775; CHECK-NEXT:    ret <2 x double> [[RES]]
776;
777  %b = load <2 x double>, ptr %bptr
778  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
779  ret <2 x double>%res
780}
781
782define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) #0 {
783; CHECK-LABEL: @test_rndscale_sd_maskz(
784; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
785; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
786; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
787; CHECK-NEXT:    call void @llvm.donothing()
788; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
789; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
790; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
791; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
792; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
793; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
794; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
795; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
796; CHECK:       6:
797; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
798; CHECK-NEXT:    unreachable
799; CHECK:       7:
800; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
801; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
802; CHECK-NEXT:    ret <2 x double> [[RES]]
803;
804  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4)
805  ret <2 x double>%res
806}
807
808declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
809
810define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b, <4 x float> %extra_param) #0 {
811; CHECK-LABEL: @test_rndscale_ss(
812; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
813; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
814; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
815; CHECK-NEXT:    call void @llvm.donothing()
816; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
817; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
818; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
819; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
820; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
821; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to i128
822; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
823; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
824; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
825; CHECK:       7:
826; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
827; CHECK-NEXT:    unreachable
828; CHECK:       8:
829; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[EXTRA_PARAM:%.*]], i8 -1, i32 11, i32 4)
830; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
831; CHECK-NEXT:    ret <4 x float> [[RES]]
832;
833  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %extra_param, i8 -1, i32 11, i32 4)
834  ret <4 x float>%res
835}
836
837define <4 x float> @test_rndscale_ss_load(<4 x float> %a, ptr %bptr, <4 x float> %extra_param) #0 {
838; CHECK-LABEL: @test_rndscale_ss_load(
839; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
840; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
841; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
842; CHECK-NEXT:    call void @llvm.donothing()
843; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
844; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
845; CHECK:       4:
846; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
847; CHECK-NEXT:    unreachable
848; CHECK:       5:
849; CHECK-NEXT:    [[B:%.*]] = load <4 x float>, ptr [[BPTR:%.*]], align 16
850; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[BPTR]] to i64
851; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
852; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
853; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
854; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
855; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
856; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
857; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
858; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
859; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
860; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
861; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
862; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
863; CHECK:       12:
864; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
865; CHECK-NEXT:    unreachable
866; CHECK:       13:
867; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B]], <4 x float> [[EXTRA_PARAM:%.*]], i8 -1, i32 11, i32 4)
868; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
869; CHECK-NEXT:    ret <4 x float> [[RES]]
870;
871  %b = load <4 x float>, ptr %bptr
872  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %extra_param, i8 -1, i32 11, i32 4)
873  ret <4 x float>%res
874}
875
876define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) #0 {
877; CHECK-LABEL: @test_rndscale_ss_mask(
878; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
879; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
880; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
881; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
882; CHECK-NEXT:    call void @llvm.donothing()
883; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
884; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
885; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
886; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
887; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
888; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
889; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
890; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
891; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
892; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
893; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
894; CHECK:       8:
895; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
896; CHECK-NEXT:    unreachable
897; CHECK:       9:
898; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
899; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
900; CHECK-NEXT:    ret <4 x float> [[RES]]
901;
902  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4)
903  ret <4 x float>%res
904}
905
906define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) #0 {
907; CHECK-LABEL: @test_rndscale_ss_maskz(
908; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
909; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
910; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
911; CHECK-NEXT:    call void @llvm.donothing()
912; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
913; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
914; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
915; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
916; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
917; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
918; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
919; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
920; CHECK:       6:
921; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
922; CHECK-NEXT:    unreachable
923; CHECK:       7:
924; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
925; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
926; CHECK-NEXT:    ret <4 x float> [[RES]]
927;
928  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4)
929  ret <4 x float>%res
930}
931
932declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
933
934define <8 x double> @test7(<8 x double> %a) #0 {
935; CHECK-LABEL: @test7(
936; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
937; CHECK-NEXT:    call void @llvm.donothing()
938; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
939; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
940; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
941; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
942; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
943; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
944; CHECK:       4:
945; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
946; CHECK-NEXT:    unreachable
947; CHECK:       5:
948; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> [[A:%.*]], i32 11, <8 x double> [[A]], i8 -1, i32 4)
949; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
950; CHECK-NEXT:    ret <8 x double> [[RES]]
951;
952  %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
953  ret <8 x double>%res
954}
955
956declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
957
958define <16 x float> @test8(<16 x float> %a) #0 {
959; CHECK-LABEL: @test8(
960; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
961; CHECK-NEXT:    call void @llvm.donothing()
962; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
963; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
964; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
965; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
966; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
967; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
968; CHECK:       4:
969; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
970; CHECK-NEXT:    unreachable
971; CHECK:       5:
972; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> [[A:%.*]], i32 11, <16 x float> [[A]], i16 -1, i32 4)
973; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
974; CHECK-NEXT:    ret <16 x float> [[RES]]
975;
976  %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
977  ret <16 x float>%res
978}
979
980define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) #0 {
981; CHECK-LABEL: @test_rsqrt_ps_512(
982; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
983; CHECK-NEXT:    call void @llvm.donothing()
984; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
985; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
986; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
987; CHECK:       3:
988; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
989; CHECK-NEXT:    unreachable
990; CHECK:       4:
991; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
992; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
993; CHECK-NEXT:    ret <16 x float> [[RES]]
994;
995  %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
996  ret <16 x float> %res
997}
998declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
999
1000define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) #0 {
1001; CHECK-LABEL: @test_sqrt_pd_512(
1002; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1003; CHECK-NEXT:    call void @llvm.donothing()
1004; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
1005; CHECK-NEXT:    store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8
1006; CHECK-NEXT:    ret <8 x double> [[TMP2]]
1007;
1008  %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
1009  ret <8 x double> %1
1010}
1011
1012define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
1013; CHECK-LABEL: @test_mask_sqrt_pd_512(
1014; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1015; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
1016; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1017; CHECK-NEXT:    call void @llvm.donothing()
1018; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
1019; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
1020; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1021; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]]
1022; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64>
1023; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
1024; CHECK-NEXT:    [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]]
1025; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP1]]
1026; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]]
1027; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]]
1028; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[PASSTHRU]]
1029; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1030; CHECK-NEXT:    ret <8 x double> [[TMP13]]
1031;
1032  %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
1033  %2 = bitcast i8 %mask to <8 x i1>
1034  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
1035  ret <8 x double> %3
1036}
1037
1038define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) #0 {
1039; CHECK-LABEL: @test_maskz_sqrt_pd_512(
1040; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1041; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1042; CHECK-NEXT:    call void @llvm.donothing()
1043; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
1044; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
1045; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1046; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP1]], <8 x i64> zeroinitializer
1047; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64>
1048; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer
1049; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]]
1050; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
1051; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]]
1052; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer
1053; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1054; CHECK-NEXT:    ret <8 x double> [[TMP11]]
1055;
1056  %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
1057  %2 = bitcast i8 %mask to <8 x i1>
1058  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
1059  ret <8 x double> %3
1060}
1061declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
1062
1063define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) #0 {
1064; CHECK-LABEL: @test_sqrt_round_pd_512(
1065; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1066; CHECK-NEXT:    call void @llvm.donothing()
1067; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1068; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1069; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1070; CHECK:       3:
1071; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1072; CHECK-NEXT:    unreachable
1073; CHECK:       4:
1074; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
1075; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1076; CHECK-NEXT:    ret <8 x double> [[TMP5]]
1077;
1078  %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
1079  ret <8 x double> %1
1080}
1081
1082define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
1083; CHECK-LABEL: @test_mask_sqrt_round_pd_512(
1084; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1085; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
1086; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1087; CHECK-NEXT:    call void @llvm.donothing()
1088; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1089; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
1090; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1091; CHECK:       5:
1092; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1093; CHECK-NEXT:    unreachable
1094; CHECK:       6:
1095; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
1096; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
1097; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1098; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> zeroinitializer, <8 x i64> [[TMP3]]
1099; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
1100; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
1101; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
1102; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
1103; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]]
1104; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
1105; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP9]], <8 x double> [[TMP7]], <8 x double> [[PASSTHRU]]
1106; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1107; CHECK-NEXT:    ret <8 x double> [[TMP16]]
1108;
1109  %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
1110  %2 = bitcast i8 %mask to <8 x i1>
1111  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
1112  ret <8 x double> %3
1113}
1114
1115define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) #0 {
1116; CHECK-LABEL: @test_maskz_sqrt_round_pd_512(
1117; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1118; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1119; CHECK-NEXT:    call void @llvm.donothing()
1120; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1121; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
1122; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1123; CHECK:       4:
1124; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1125; CHECK-NEXT:    unreachable
1126; CHECK:       5:
1127; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
1128; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
1129; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1130; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
1131; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64>
1132; CHECK-NEXT:    [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
1133; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
1134; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
1135; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
1136; CHECK-NEXT:    [[TMP14:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> zeroinitializer
1137; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1138; CHECK-NEXT:    ret <8 x double> [[TMP14]]
1139;
1140  %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
1141  %2 = bitcast i8 %mask to <8 x i1>
1142  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
1143  ret <8 x double> %3
1144}
1145declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone
1146
1147define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) #0 {
1148; CHECK-LABEL: @test_sqrt_ps_512(
1149; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1150; CHECK-NEXT:    call void @llvm.donothing()
1151; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
1152; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
1153; CHECK-NEXT:    ret <16 x float> [[TMP2]]
1154;
1155  %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
1156  ret <16 x float> %1
1157}
1158
1159define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
1160; CHECK-LABEL: @test_mask_sqrt_ps_512(
1161; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1162; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
1163; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1164; CHECK-NEXT:    call void @llvm.donothing()
1165; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
1166; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1167; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
1168; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
1169; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32>
1170; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
1171; CHECK-NEXT:    [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]]
1172; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
1173; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]]
1174; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]]
1175; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[PASSTHRU]]
1176; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1177; CHECK-NEXT:    ret <16 x float> [[TMP13]]
1178;
1179  %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
1180  %2 = bitcast i16 %mask to <16 x i1>
1181  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
1182  ret <16 x float> %3
1183}
1184
1185define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) #0 {
1186; CHECK-LABEL: @test_maskz_sqrt_ps_512(
1187; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1188; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1189; CHECK-NEXT:    call void @llvm.donothing()
1190; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
1191; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1192; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
1193; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer
1194; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32>
1195; CHECK-NEXT:    [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer
1196; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]]
1197; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
1198; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]]
1199; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer
1200; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1201; CHECK-NEXT:    ret <16 x float> [[TMP11]]
1202;
1203  %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
1204  %2 = bitcast i16 %mask to <16 x i1>
1205  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
1206  ret <16 x float> %3
1207}
1208declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
1209
1210define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) #0 {
1211; CHECK-LABEL: @test_sqrt_round_ps_512(
1212; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1213; CHECK-NEXT:    call void @llvm.donothing()
1214; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1215; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1216; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1217; CHECK:       3:
1218; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1219; CHECK-NEXT:    unreachable
1220; CHECK:       4:
1221; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
1222; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1223; CHECK-NEXT:    ret <16 x float> [[TMP5]]
1224;
1225  %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
1226  ret <16 x float> %1
1227}
1228
1229define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
1230; CHECK-LABEL: @test_mask_sqrt_round_ps_512(
1231; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1232; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
1233; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1234; CHECK-NEXT:    call void @llvm.donothing()
1235; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1236; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
1237; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1238; CHECK:       5:
1239; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1240; CHECK-NEXT:    unreachable
1241; CHECK:       6:
1242; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
1243; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1244; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
1245; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
1246; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32>
1247; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
1248; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
1249; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
1250; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]]
1251; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
1252; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[PASSTHRU]]
1253; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1254; CHECK-NEXT:    ret <16 x float> [[TMP16]]
1255;
1256  %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
1257  %2 = bitcast i16 %mask to <16 x i1>
1258  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
1259  ret <16 x float> %3
1260}
1261
1262define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) #0 {
1263; CHECK-LABEL: @test_maskz_sqrt_round_ps_512(
1264; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1265; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1266; CHECK-NEXT:    call void @llvm.donothing()
1267; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1268; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
1269; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1270; CHECK:       4:
1271; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1272; CHECK-NEXT:    unreachable
1273; CHECK:       5:
1274; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
1275; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1276; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
1277; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
1278; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32>
1279; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
1280; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
1281; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
1282; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
1283; CHECK-NEXT:    [[TMP14:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> zeroinitializer
1284; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
1285; CHECK-NEXT:    ret <16 x float> [[TMP14]]
1286;
1287  %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
1288  %2 = bitcast i16 %mask to <16 x i1>
1289  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
1290  ret <16 x float> %3
1291}
1292declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone
1293
1294define <8 x double> @test_getexp_pd_512(<8 x double> %a0) #0 {
1295; CHECK-LABEL: @test_getexp_pd_512(
1296; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1297; CHECK-NEXT:    call void @llvm.donothing()
1298; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1299; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1300; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1301; CHECK:       3:
1302; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1303; CHECK-NEXT:    unreachable
1304; CHECK:       4:
1305; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 4)
1306; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1307; CHECK-NEXT:    ret <8 x double> [[RES]]
1308;
1309  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4)
1310  ret <8 x double> %res
1311}
1312define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) #0 {
1313; CHECK-LABEL: @test_getexp_round_pd_512(
1314; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1315; CHECK-NEXT:    call void @llvm.donothing()
1316; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1317; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1318; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1319; CHECK:       3:
1320; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1321; CHECK-NEXT:    unreachable
1322; CHECK:       4:
1323; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 12)
1324; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1325; CHECK-NEXT:    ret <8 x double> [[RES]]
1326;
1327  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 12)
1328  ret <8 x double> %res
1329}
1330declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
1331
1332define <16 x float> @test_getexp_ps_512(<16 x float> %a0) #0 {
1333; CHECK-LABEL: @test_getexp_ps_512(
1334; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1335; CHECK-NEXT:    call void @llvm.donothing()
1336; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1337; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1338; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1339; CHECK:       3:
1340; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1341; CHECK-NEXT:    unreachable
1342; CHECK:       4:
1343; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 4)
1344; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1345; CHECK-NEXT:    ret <16 x float> [[RES]]
1346;
1347  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
1348  ret <16 x float> %res
1349}
1350
1351define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) #0 {
1352; CHECK-LABEL: @test_getexp_round_ps_512(
1353; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1354; CHECK-NEXT:    call void @llvm.donothing()
1355; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1356; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
1357; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1358; CHECK:       3:
1359; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1360; CHECK-NEXT:    unreachable
1361; CHECK:       4:
1362; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 8)
1363; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1364; CHECK-NEXT:    ret <16 x float> [[RES]]
1365;
1366  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
1367  ret <16 x float> %res
1368}
1369declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
1370
1371declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
1372
1373define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
1374; CHECK-LABEL: @test_sqrt_ss(
1375; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1376; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1377; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1378; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
1379; CHECK-NEXT:    call void @llvm.donothing()
1380; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1381; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1382; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1383; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
1384; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1385; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1386; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
1387; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1388; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
1389; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1390; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1391; CHECK:       8:
1392; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1393; CHECK-NEXT:    unreachable
1394; CHECK:       9:
1395; CHECK-NEXT:    [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
1396; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1397; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
1398; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1399; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
1400; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
1401; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1402; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
1403; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
1404; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
1405; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
1406; CHECK-NEXT:    br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
1407; CHECK:       13:
1408; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1409; CHECK-NEXT:    unreachable
1410; CHECK:       14:
1411; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 9)
1412; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1413; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
1414; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1415; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
1416; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
1417; CHECK-NEXT:    [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
1418; CHECK-NEXT:    [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
1419; CHECK-NEXT:    br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
1420; CHECK:       17:
1421; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1422; CHECK-NEXT:    unreachable
1423; CHECK:       18:
1424; CHECK-NEXT:    [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 [[MASK]], i32 10)
1425; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1426; CHECK-NEXT:    [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
1427; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1428; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
1429; CHECK-NEXT:    [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
1430; CHECK-NEXT:    br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
1431; CHECK:       21:
1432; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1433; CHECK-NEXT:    unreachable
1434; CHECK:       22:
1435; CHECK-NEXT:    [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 -1, i32 11)
1436; CHECK-NEXT:    [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
1437; CHECK-NEXT:    [[RES_2:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
1438; CHECK-NEXT:    [[RES:%.*]] = fadd <4 x float> [[RES_1]], [[RES_2]]
1439; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1440; CHECK-NEXT:    ret <4 x float> [[RES]]
1441;
1442  %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
1443  %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
1444  %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10)
1445  %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11)
1446
1447  %res.1 = fadd <4 x float> %res0, %res1
1448  %res.2 = fadd <4 x float> %res2, %res3
1449  %res   = fadd <4 x float> %res.1, %res.2
1450  ret <4 x float> %res
1451}
1452
1453declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
1454
1455define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
1456; CHECK-LABEL: @test_sqrt_sd(
1457; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1458; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1459; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1460; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
1461; CHECK-NEXT:    call void @llvm.donothing()
1462; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1463; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1464; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1465; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
1466; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1467; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1468; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
1469; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1470; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
1471; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1472; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1473; CHECK:       8:
1474; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1475; CHECK-NEXT:    unreachable
1476; CHECK:       9:
1477; CHECK-NEXT:    [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
1478; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1479; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
1480; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1481; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
1482; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
1483; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1484; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
1485; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
1486; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
1487; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
1488; CHECK-NEXT:    br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
1489; CHECK:       13:
1490; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1491; CHECK-NEXT:    unreachable
1492; CHECK:       14:
1493; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 9)
1494; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1495; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
1496; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1497; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
1498; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
1499; CHECK-NEXT:    [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
1500; CHECK-NEXT:    [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
1501; CHECK-NEXT:    br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
1502; CHECK:       17:
1503; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1504; CHECK-NEXT:    unreachable
1505; CHECK:       18:
1506; CHECK-NEXT:    [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 [[MASK]], i32 10)
1507; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1508; CHECK-NEXT:    [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
1509; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1510; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
1511; CHECK-NEXT:    [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
1512; CHECK-NEXT:    br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
1513; CHECK:       21:
1514; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1515; CHECK-NEXT:    unreachable
1516; CHECK:       22:
1517; CHECK-NEXT:    [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 -1, i32 11)
1518; CHECK-NEXT:    [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
1519; CHECK-NEXT:    [[RES_2:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
1520; CHECK-NEXT:    [[RES:%.*]] = fadd <2 x double> [[RES_1]], [[RES_2]]
1521; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1522; CHECK-NEXT:    ret <2 x double> [[RES]]
1523;
1524  %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
1525  %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
1526  %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10)
1527  %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11)
1528
1529  %res.1 = fadd <2 x double> %res0, %res1
1530  %res.2 = fadd <2 x double> %res2, %res3
1531  %res   = fadd <2 x double> %res.1, %res.2
1532  ret <2 x double> %res
1533}
1534
1535define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) #0 {
1536; CHECK-LABEL: @test_x86_avx512_cvttsd2usi(
1537; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1538; CHECK-NEXT:    call void @llvm.donothing()
1539; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
1540; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
1541; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1542; CHECK:       3:
1543; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1544; CHECK-NEXT:    unreachable
1545; CHECK:       4:
1546; CHECK-NEXT:    [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0:%.*]], i32 4)
1547; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
1548; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
1549; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1550; CHECK:       6:
1551; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1552; CHECK-NEXT:    unreachable
1553; CHECK:       7:
1554; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0]], i32 8)
1555; CHECK-NEXT:    [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
1556; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1557; CHECK-NEXT:    ret i32 [[RES2]]
1558;
1559  %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
1560  %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
1561  %res2 = add i32 %res0, %res1
1562  ret i32 %res2
1563}
1564declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
1565
1566define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) #0 {
1567; CHECK-LABEL: @test_x86_avx512_cvttsd2si(
1568; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1569; CHECK-NEXT:    call void @llvm.donothing()
1570; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1571; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
1572; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1573; CHECK:       3:
1574; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1575; CHECK-NEXT:    unreachable
1576; CHECK:       4:
1577; CHECK-NEXT:    [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0:%.*]], i32 4)
1578; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1579; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
1580; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1581; CHECK:       6:
1582; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1583; CHECK-NEXT:    unreachable
1584; CHECK:       7:
1585; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0]], i32 8)
1586; CHECK-NEXT:    [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
1587; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1588; CHECK-NEXT:    ret i32 [[RES2]]
1589;
1590  %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
1591  %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
1592  %res2 = add i32 %res0, %res1
1593  ret i32 %res2
1594}
1595declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
1596
1597define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) #0 {
1598; CHECK-LABEL: @test_x86_avx512_cvttss2si(
1599; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1600; CHECK-NEXT:    call void @llvm.donothing()
1601; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1602; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
1603; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1604; CHECK:       3:
1605; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1606; CHECK-NEXT:    unreachable
1607; CHECK:       4:
1608; CHECK-NEXT:    [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0:%.*]], i32 8)
1609; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1610; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
1611; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1612; CHECK:       6:
1613; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1614; CHECK-NEXT:    unreachable
1615; CHECK:       7:
1616; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0]], i32 4)
1617; CHECK-NEXT:    [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
1618; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1619; CHECK-NEXT:    ret i32 [[RES2]]
1620;
1621  %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
1622  %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
1623  %res2 = add i32 %res0, %res1
1624  ret i32 %res2
1625}
1626declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
1627
1628define i32 @test_x86_avx512_cvttss2si_load(ptr %a0) #0 {
1629; CHECK-LABEL: @test_x86_avx512_cvttss2si_load(
1630; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1631; CHECK-NEXT:    call void @llvm.donothing()
1632; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1633; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1634; CHECK:       2:
1635; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1636; CHECK-NEXT:    unreachable
1637; CHECK:       3:
1638; CHECK-NEXT:    [[A1:%.*]] = load <4 x float>, ptr [[A0:%.*]], align 16
1639; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A0]] to i64
1640; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
1641; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
1642; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
1643; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1644; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
1645; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1646; CHECK:       8:
1647; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1648; CHECK-NEXT:    unreachable
1649; CHECK:       9:
1650; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A1]], i32 4)
1651; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1652; CHECK-NEXT:    ret i32 [[RES]]
1653;
1654  %a1 = load <4 x float>, ptr %a0
1655  %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ;
1656  ret i32 %res
1657}
1658
1659define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) #0 {
1660; CHECK-LABEL: @test_x86_avx512_cvttss2usi(
1661; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1662; CHECK-NEXT:    call void @llvm.donothing()
1663; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1664; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
1665; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1666; CHECK:       3:
1667; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1668; CHECK-NEXT:    unreachable
1669; CHECK:       4:
1670; CHECK-NEXT:    [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0:%.*]], i32 8)
1671; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1672; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
1673; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1674; CHECK:       6:
1675; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1676; CHECK-NEXT:    unreachable
1677; CHECK:       7:
1678; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0]], i32 4)
1679; CHECK-NEXT:    [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
1680; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1681; CHECK-NEXT:    ret i32 [[RES2]]
1682;
1683  %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
1684  %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
1685  %res2 = add i32 %res0, %res1
1686  ret i32 %res2
1687}
1688declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
1689
1690define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) #0 {
1691; CHECK-LABEL: @test_x86_avx512_cvtsd2usi32(
1692; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1693; CHECK-NEXT:    call void @llvm.donothing()
1694; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
1695; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
1696; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1697; CHECK:       3:
1698; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1699; CHECK-NEXT:    unreachable
1700; CHECK:       4:
1701; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0:%.*]], i32 4)
1702; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
1703; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
1704; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1705; CHECK:       6:
1706; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1707; CHECK-NEXT:    unreachable
1708; CHECK:       7:
1709; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 11)
1710; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
1711; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0
1712; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
1713; CHECK:       9:
1714; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1715; CHECK-NEXT:    unreachable
1716; CHECK:       10:
1717; CHECK-NEXT:    [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 9)
1718; CHECK-NEXT:    [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
1719; CHECK-NEXT:    [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
1720; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1721; CHECK-NEXT:    ret i32 [[RES4]]
1722;
1723  %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
1724  %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11)
1725  %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9)
1726  %res3 = add i32 %res, %res1
1727  %res4 = add i32 %res3, %res2
1728  ret i32 %res4
1729}
1730declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
1731
1732define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) #0 {
1733; CHECK-LABEL: @test_x86_avx512_cvtsd2si32(
1734; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1735; CHECK-NEXT:    call void @llvm.donothing()
1736; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1737; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
1738; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1739; CHECK:       3:
1740; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1741; CHECK-NEXT:    unreachable
1742; CHECK:       4:
1743; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0:%.*]], i32 4)
1744; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1745; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
1746; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1747; CHECK:       6:
1748; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1749; CHECK-NEXT:    unreachable
1750; CHECK:       7:
1751; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 11)
1752; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1753; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
1754; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
1755; CHECK:       9:
1756; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1757; CHECK-NEXT:    unreachable
1758; CHECK:       10:
1759; CHECK-NEXT:    [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 9)
1760; CHECK-NEXT:    [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
1761; CHECK-NEXT:    [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
1762; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1763; CHECK-NEXT:    ret i32 [[RES4]]
1764;
1765  %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
1766  %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11)
1767  %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9)
1768  %res3 = add i32 %res, %res1
1769  %res4 = add i32 %res3, %res2
1770  ret i32 %res4
1771}
1772declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
1773
1774define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) #0 {
1775; CHECK-LABEL: @test_x86_avx512_cvtss2usi32(
1776; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1777; CHECK-NEXT:    call void @llvm.donothing()
1778; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1779; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
1780; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1781; CHECK:       3:
1782; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1783; CHECK-NEXT:    unreachable
1784; CHECK:       4:
1785; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0:%.*]], i32 4)
1786; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1787; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
1788; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1789; CHECK:       6:
1790; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1791; CHECK-NEXT:    unreachable
1792; CHECK:       7:
1793; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 11)
1794; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
1795; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i32 [[TMP8]], 0
1796; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
1797; CHECK:       9:
1798; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1799; CHECK-NEXT:    unreachable
1800; CHECK:       10:
1801; CHECK-NEXT:    [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 9)
1802; CHECK-NEXT:    [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
1803; CHECK-NEXT:    [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
1804; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1805; CHECK-NEXT:    ret i32 [[RES4]]
1806;
1807  %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
1808  %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11)
1809  %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9)
1810  %res3 = add i32 %res, %res1
1811  %res4 = add i32 %res3, %res2
1812  ret i32 %res4
1813}
1814declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
1815
1816define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) #0 {
1817; CHECK-LABEL: @test_x86_avx512_cvtss2si32(
1818; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1819; CHECK-NEXT:    call void @llvm.donothing()
1820; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1821; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
1822; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1823; CHECK:       3:
1824; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1825; CHECK-NEXT:    unreachable
1826; CHECK:       4:
1827; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0:%.*]], i32 4)
1828; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1829; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
1830; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1831; CHECK:       6:
1832; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1833; CHECK-NEXT:    unreachable
1834; CHECK:       7:
1835; CHECK-NEXT:    [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 11)
1836; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1837; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
1838; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
1839; CHECK:       9:
1840; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1841; CHECK-NEXT:    unreachable
1842; CHECK:       10:
1843; CHECK-NEXT:    [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 9)
1844; CHECK-NEXT:    [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
1845; CHECK-NEXT:    [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
1846; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
1847; CHECK-NEXT:    ret i32 [[RES4]]
1848;
1849  %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
1850  %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11)
1851  %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9)
1852  %res3 = add i32 %res, %res1
1853  %res4 = add i32 %res3, %res2
1854  ret i32 %res4
1855}
1856declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
1857
1858define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, ptr %dst) #0 {
1859; CHECK-LABEL: @test_x86_vcvtps2ph_256(
1860; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1861; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
1862; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1863; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
1864; CHECK-NEXT:    call void @llvm.donothing()
1865; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1866; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
1867; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1868; CHECK:       6:
1869; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1870; CHECK-NEXT:    unreachable
1871; CHECK:       7:
1872; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
1873; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1874; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
1875; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
1876; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
1877; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
1878; CHECK:       9:
1879; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1880; CHECK-NEXT:    unreachable
1881; CHECK:       10:
1882; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
1883; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1884; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
1885; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
1886; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
1887; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
1888; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
1889; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
1890; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
1891; CHECK:       13:
1892; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1893; CHECK-NEXT:    unreachable
1894; CHECK:       14:
1895; CHECK-NEXT:    [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
1896; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
1897; CHECK-NEXT:    br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
1898; CHECK:       15:
1899; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1900; CHECK-NEXT:    unreachable
1901; CHECK:       16:
1902; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
1903; CHECK-NEXT:    [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
1904; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
1905; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
1906; CHECK-NEXT:    store <16 x i16> [[RES1]], ptr [[DST]], align 32
1907; CHECK-NEXT:    [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
1908; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
1909; CHECK-NEXT:    ret <16 x i16> [[RES]]
1910;
1911  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
1912  %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 11, <16 x i16> zeroinitializer, i16 %mask)
1913  %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 12, <16 x i16> %src, i16 %mask)
1914  store <16 x i16> %res1, ptr %dst
1915  %res  = add <16 x i16> %res2, %res3
1916  ret <16 x i16> %res
1917}
1918
1919declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
1920
1921define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) #0 {
1922; CHECK-LABEL: @test_cmpps(
1923; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
1924; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1925; CHECK-NEXT:    call void @llvm.donothing()
1926; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
1927; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
1928; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
1929; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
1930; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1931; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1932; CHECK:       5:
1933; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1934; CHECK-NEXT:    unreachable
1935; CHECK:       6:
1936; CHECK-NEXT:    [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 2, <16 x i1> splat (i1 true), i32 8)
1937; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i1> [[RES]] to i16
1938; CHECK-NEXT:    store i16 0, ptr @__msan_retval_tls, align 8
1939; CHECK-NEXT:    ret i16 [[TMP7]]
1940;
1941  %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
1942  %1 = bitcast <16 x i1> %res to i16
1943  ret i16 %1
1944}
1945declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32)
1946
1947define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) #0 {
1948; CHECK-LABEL: @test_cmppd(
1949; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1950; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1951; CHECK-NEXT:    call void @llvm.donothing()
1952; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
1953; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
1954; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
1955; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
1956; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1957; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1958; CHECK:       5:
1959; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
1960; CHECK-NEXT:    unreachable
1961; CHECK:       6:
1962; CHECK-NEXT:    [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 4, <8 x i1> splat (i1 true), i32 4)
1963; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i1> [[RES]] to i8
1964; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
1965; CHECK-NEXT:    ret i8 [[TMP7]]
1966;
1967  %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
1968  %1 = bitcast <8 x i1> %res to i8
1969  ret i8 %1
1970}
1971declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32)
1972
1973
1974  ; fp min - max
1975define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) #0 {
1976; CHECK-LABEL: @test_vmaxpd(
1977; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1978; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1979; CHECK-NEXT:    call void @llvm.donothing()
1980; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
1981; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
1982; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
1983; CHECK-NEXT:    store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
1984; CHECK-NEXT:    ret <8 x double> [[TMP7]]
1985;
1986  %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
1987  ret <8 x double> %1
1988}
1989declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
1990
1991define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) #0 {
1992; CHECK-LABEL: @test_vminpd(
1993; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
1994; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
1995; CHECK-NEXT:    call void @llvm.donothing()
1996; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
1997; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
1998; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
1999; CHECK-NEXT:    store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
2000; CHECK-NEXT:    ret <8 x double> [[TMP7]]
2001;
2002  %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
2003  ret <8 x double> %1
2004}
2005declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
2006
2007define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) #0 {
2008; CHECK-LABEL: @test_mask_store_ss(
2009; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2010; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2011; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8
2012; CHECK-NEXT:    call void @llvm.donothing()
2013; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP1]], 0
2014; CHECK-NEXT:    [[TMP5:%.*]] = and i8 [[MASK:%.*]], 0
2015; CHECK-NEXT:    [[TMP6:%.*]] = and i8 [[TMP1]], 1
2016; CHECK-NEXT:    [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]]
2017; CHECK-NEXT:    [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]]
2018; CHECK-NEXT:    [[TMP9:%.*]] = and i8 [[MASK]], 1
2019; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
2020; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP9]] to <8 x i1>
2021; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2022; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2023; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64
2024; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
2025; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
2026; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[TMP14]], i32 1, <4 x i1> [[EXTRACT]])
2027; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
2028; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i1> [[_MSPROP]] to i4
2029; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i4 [[TMP15]], 0
2030; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2031; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]]
2032; CHECK:       16:
2033; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2034; CHECK-NEXT:    unreachable
2035; CHECK:       17:
2036; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[DATA:%.*]], ptr [[PTR]], i32 1, <4 x i1> [[EXTRACT]])
2037; CHECK-NEXT:    ret void
2038;
2039  %1 = and i8 %mask, 1
2040  %2 = bitcast i8 %1 to <8 x i1>
2041  %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2042  call void @llvm.masked.store.v4f32.p0(<4 x float> %data, ptr %ptr, i32 1, <4 x i1> %extract)
2043  ret void
2044}
2045declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) #1
2046
2047
2048declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
2049declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
2050declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
2051
2052define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
2053; CHECK-LABEL: @test_vsubps_rn(
2054; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2055; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2056; CHECK-NEXT:    call void @llvm.donothing()
2057; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2058; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2059; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2060; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2061; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2062; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2063; CHECK:       5:
2064; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2065; CHECK-NEXT:    unreachable
2066; CHECK:       6:
2067; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2068; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2069; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2070;
2071  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2072  ret <16 x float> %1
2073}
2074
2075define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
2076; CHECK-LABEL: @test_vsubps_rd(
2077; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2078; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2079; CHECK-NEXT:    call void @llvm.donothing()
2080; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2081; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2082; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2083; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2084; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2085; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2086; CHECK:       5:
2087; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2088; CHECK-NEXT:    unreachable
2089; CHECK:       6:
2090; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2091; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2092; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2093;
2094  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2095  ret <16 x float> %1
2096}
2097
2098define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
2099; CHECK-LABEL: @test_vsubps_ru(
2100; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2101; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2102; CHECK-NEXT:    call void @llvm.donothing()
2103; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2104; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2105; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2106; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2107; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2108; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2109; CHECK:       5:
2110; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2111; CHECK-NEXT:    unreachable
2112; CHECK:       6:
2113; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2114; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2115; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2116;
2117  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2118  ret <16 x float> %1
2119}
2120
2121define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
2122; CHECK-LABEL: @test_vsubps_rz(
2123; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2124; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2125; CHECK-NEXT:    call void @llvm.donothing()
2126; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2127; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2128; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2129; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2130; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2131; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2132; CHECK:       5:
2133; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2134; CHECK-NEXT:    unreachable
2135; CHECK:       6:
2136; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2137; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2138; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2139;
2140  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2141  ret <16 x float> %1
2142}
2143
2144define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
2145; CHECK-LABEL: @test_vmulps_rn(
2146; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2147; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2148; CHECK-NEXT:    call void @llvm.donothing()
2149; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2150; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2151; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2152; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2153; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2154; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2155; CHECK:       5:
2156; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2157; CHECK-NEXT:    unreachable
2158; CHECK:       6:
2159; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2160; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2161; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2162;
2163  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2164  ret <16 x float> %1
2165}
2166
2167define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
2168; CHECK-LABEL: @test_vmulps_rd(
2169; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2170; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2171; CHECK-NEXT:    call void @llvm.donothing()
2172; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2173; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2174; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2175; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2176; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2177; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2178; CHECK:       5:
2179; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2180; CHECK-NEXT:    unreachable
2181; CHECK:       6:
2182; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2183; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2184; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2185;
2186  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2187  ret <16 x float> %1
2188}
2189
2190define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
2191; CHECK-LABEL: @test_vmulps_ru(
2192; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2193; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2194; CHECK-NEXT:    call void @llvm.donothing()
2195; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2196; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2197; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2198; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2199; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2200; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2201; CHECK:       5:
2202; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2203; CHECK-NEXT:    unreachable
2204; CHECK:       6:
2205; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2206; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2207; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2208;
2209  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2210  ret <16 x float> %1
2211}
2212
2213define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
2214; CHECK-LABEL: @test_vmulps_rz(
2215; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2216; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2217; CHECK-NEXT:    call void @llvm.donothing()
2218; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2219; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
2220; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2221; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
2222; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2223; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
2224; CHECK:       5:
2225; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2226; CHECK-NEXT:    unreachable
2227; CHECK:       6:
2228; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2229; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2230; CHECK-NEXT:    ret <16 x float> [[TMP7]]
2231;
2232  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2233  ret <16 x float> %1
2234}
2235
2236define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2237; CHECK-LABEL: @test_vmulps_mask_rn(
2238; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2239; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2240; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2241; CHECK-NEXT:    call void @llvm.donothing()
2242; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2243; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2244; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2245; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2246; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2247; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2248; CHECK:       6:
2249; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2250; CHECK-NEXT:    unreachable
2251; CHECK:       7:
2252; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2253; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2254; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2255; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2256; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2257; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2258; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2259; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2260; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2261; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2262; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2263; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2264;
2265  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2266  %2 = bitcast i16 %mask to <16 x i1>
2267  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2268  ret <16 x float> %3
2269}
2270
2271define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2272; CHECK-LABEL: @test_vmulps_mask_rd(
2273; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2274; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2275; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2276; CHECK-NEXT:    call void @llvm.donothing()
2277; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2278; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2279; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2280; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2281; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2282; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2283; CHECK:       6:
2284; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2285; CHECK-NEXT:    unreachable
2286; CHECK:       7:
2287; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2288; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2289; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2290; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2291; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2292; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2293; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2294; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2295; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2296; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2297; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2298; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2299;
2300  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2301  %2 = bitcast i16 %mask to <16 x i1>
2302  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2303  ret <16 x float> %3
2304}
2305
2306define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2307; CHECK-LABEL: @test_vmulps_mask_ru(
2308; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2309; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2310; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2311; CHECK-NEXT:    call void @llvm.donothing()
2312; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2313; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2314; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2315; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2316; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2317; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2318; CHECK:       6:
2319; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2320; CHECK-NEXT:    unreachable
2321; CHECK:       7:
2322; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2323; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2324; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2325; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2326; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2327; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2328; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2329; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2330; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2331; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2332; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2333; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2334;
2335  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2336  %2 = bitcast i16 %mask to <16 x i1>
2337  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2338  ret <16 x float> %3
2339}
2340
2341define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2342; CHECK-LABEL: @test_vmulps_mask_rz(
2343; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2344; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2345; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2346; CHECK-NEXT:    call void @llvm.donothing()
2347; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2348; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2349; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2350; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2351; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2352; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2353; CHECK:       6:
2354; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2355; CHECK-NEXT:    unreachable
2356; CHECK:       7:
2357; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2358; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2359; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2360; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2361; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2362; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2363; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2364; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2365; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2366; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2367; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2368; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2369;
2370  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2371  %2 = bitcast i16 %mask to <16 x i1>
2372  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2373  ret <16 x float> %3
2374}
2375
2376define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
2377; CHECK-LABEL: @test_vmulps_mask_passthru_rn(
2378; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2379; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2380; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2381; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2382; CHECK-NEXT:    call void @llvm.donothing()
2383; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2384; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2385; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2386; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2387; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2388; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2389; CHECK:       7:
2390; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2391; CHECK-NEXT:    unreachable
2392; CHECK:       8:
2393; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2394; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2395; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2396; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2397; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2398; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
2399; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2400; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2401; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2402; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2403; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
2404; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2405; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2406;
2407  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2408  %2 = bitcast i16 %mask to <16 x i1>
2409  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2410  ret <16 x float> %3
2411}
2412
2413define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
2414; CHECK-LABEL: @test_vmulps_mask_passthru_rd(
2415; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2416; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2417; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2418; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2419; CHECK-NEXT:    call void @llvm.donothing()
2420; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2421; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2422; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2423; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2424; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2425; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2426; CHECK:       7:
2427; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2428; CHECK-NEXT:    unreachable
2429; CHECK:       8:
2430; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2431; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2432; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2433; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2434; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2435; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
2436; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2437; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2438; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2439; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2440; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
2441; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2442; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2443;
2444  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2445  %2 = bitcast i16 %mask to <16 x i1>
2446  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2447  ret <16 x float> %3
2448}
2449
2450define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
2451; CHECK-LABEL: @test_vmulps_mask_passthru_ru(
2452; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2453; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2454; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2455; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2456; CHECK-NEXT:    call void @llvm.donothing()
2457; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2458; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2459; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2460; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2461; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2462; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2463; CHECK:       7:
2464; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2465; CHECK-NEXT:    unreachable
2466; CHECK:       8:
2467; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2468; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2469; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2470; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2471; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2472; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
2473; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2474; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2475; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2476; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2477; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
2478; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2479; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2480;
2481  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2482  %2 = bitcast i16 %mask to <16 x i1>
2483  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2484  ret <16 x float> %3
2485}
2486
2487define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
2488; CHECK-LABEL: @test_vmulps_mask_passthru_rz(
2489; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2490; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2491; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2492; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2493; CHECK-NEXT:    call void @llvm.donothing()
2494; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2495; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2496; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2497; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2498; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2499; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2500; CHECK:       7:
2501; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2502; CHECK-NEXT:    unreachable
2503; CHECK:       8:
2504; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2505; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2506; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2507; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2508; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2509; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
2510; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2511; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2512; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2513; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2514; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
2515; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2516; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2517;
2518  %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2519  %2 = bitcast i16 %mask to <16 x i1>
2520  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2521  ret <16 x float> %3
2522}
2523
2524define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
2525; CHECK-LABEL: @test_vmulpd_mask_rn(
2526; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
2527; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2528; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2529; CHECK-NEXT:    call void @llvm.donothing()
2530; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
2531; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2532; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
2533; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2534; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2535; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2536; CHECK:       6:
2537; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2538; CHECK-NEXT:    unreachable
2539; CHECK:       7:
2540; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 8)
2541; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
2542; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2543; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
2544; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
2545; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
2546; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
2547; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
2548; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
2549; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
2550; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2551; CHECK-NEXT:    ret <8 x double> [[TMP16]]
2552;
2553  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8)
2554  %2 = bitcast i8 %mask to <8 x i1>
2555  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
2556  ret <8 x double> %3
2557}
2558
2559define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
2560; CHECK-LABEL: @test_vmulpd_mask_rd(
2561; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
2562; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2563; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2564; CHECK-NEXT:    call void @llvm.donothing()
2565; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
2566; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2567; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
2568; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2569; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2570; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2571; CHECK:       6:
2572; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2573; CHECK-NEXT:    unreachable
2574; CHECK:       7:
2575; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 9)
2576; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
2577; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2578; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
2579; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
2580; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
2581; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
2582; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
2583; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
2584; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
2585; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2586; CHECK-NEXT:    ret <8 x double> [[TMP16]]
2587;
2588  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9)
2589  %2 = bitcast i8 %mask to <8 x i1>
2590  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
2591  ret <8 x double> %3
2592}
2593
2594define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
2595; CHECK-LABEL: @test_vmulpd_mask_ru(
2596; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
2597; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2598; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2599; CHECK-NEXT:    call void @llvm.donothing()
2600; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
2601; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2602; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
2603; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2604; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2605; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2606; CHECK:       6:
2607; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2608; CHECK-NEXT:    unreachable
2609; CHECK:       7:
2610; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 10)
2611; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
2612; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2613; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
2614; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
2615; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
2616; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
2617; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
2618; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
2619; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
2620; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2621; CHECK-NEXT:    ret <8 x double> [[TMP16]]
2622;
2623  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10)
2624  %2 = bitcast i8 %mask to <8 x i1>
2625  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
2626  ret <8 x double> %3
2627}
2628
2629define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
2630; CHECK-LABEL: @test_vmulpd_mask_rz(
2631; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
2632; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2633; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2634; CHECK-NEXT:    call void @llvm.donothing()
2635; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
2636; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2637; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
2638; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2639; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2640; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2641; CHECK:       6:
2642; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2643; CHECK-NEXT:    unreachable
2644; CHECK:       7:
2645; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 11)
2646; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
2647; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2648; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
2649; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
2650; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
2651; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
2652; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
2653; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
2654; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
2655; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2656; CHECK-NEXT:    ret <8 x double> [[TMP16]]
2657;
2658  %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11)
2659  %2 = bitcast i8 %mask to <8 x i1>
2660  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
2661  ret <8 x double> %3
2662}
2663
2664define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2665; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rn_sae(
2666; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2667; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2668; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2669; CHECK-NEXT:    call void @llvm.donothing()
2670; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2671; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2672; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2673; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2674; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2675; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2676; CHECK:       6:
2677; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2678; CHECK-NEXT:    unreachable
2679; CHECK:       7:
2680; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2681; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2682; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2683; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2684; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2685; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2686; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2687; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2688; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2689; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2690; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2691; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2692;
2693  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2694  %2 = bitcast i16 %mask to <16 x i1>
2695  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2696  ret <16 x float> %3
2697}
2698
2699define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2700; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rd_sae(
2701; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2702; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2703; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2704; CHECK-NEXT:    call void @llvm.donothing()
2705; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2706; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2707; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2708; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2709; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2710; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2711; CHECK:       6:
2712; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2713; CHECK-NEXT:    unreachable
2714; CHECK:       7:
2715; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2716; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2717; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2718; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2719; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2720; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2721; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2722; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2723; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2724; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2725; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2726; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2727;
2728  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2729  %2 = bitcast i16 %mask to <16 x i1>
2730  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2731  ret <16 x float> %3
2732}
2733
2734define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2735; CHECK-LABEL: @test_mm512_maskz_add_round_ps_ru_sae(
2736; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2737; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2738; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2739; CHECK-NEXT:    call void @llvm.donothing()
2740; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2741; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2742; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2743; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2744; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2745; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2746; CHECK:       6:
2747; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2748; CHECK-NEXT:    unreachable
2749; CHECK:       7:
2750; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2751; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2752; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2753; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2754; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2755; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2756; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2757; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2758; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2759; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2760; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2761; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2762;
2763  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2764  %2 = bitcast i16 %mask to <16 x i1>
2765  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2766  ret <16 x float> %3
2767}
2768
2769define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2770; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rz_sae(
2771; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2772; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2773; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2774; CHECK-NEXT:    call void @llvm.donothing()
2775; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2776; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2777; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2778; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2779; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2780; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2781; CHECK:       6:
2782; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2783; CHECK-NEXT:    unreachable
2784; CHECK:       7:
2785; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2786; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2787; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2788; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2789; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2790; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2791; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2792; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2793; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2794; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2795; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2796; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2797;
2798  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2799  %2 = bitcast i16 %mask to <16 x i1>
2800  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2801  ret <16 x float> %3
2802}
2803
2804define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
2805; CHECK-LABEL: @test_mm512_maskz_add_round_ps_current(
2806; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2807; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2808; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2809; CHECK-NEXT:    call void @llvm.donothing()
2810; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2811; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
2812; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2813; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
2814; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2815; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2816; CHECK:       6:
2817; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2818; CHECK-NEXT:    unreachable
2819; CHECK:       7:
2820; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
2821; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2822; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2823; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
2824; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
2825; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
2826; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
2827; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
2828; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
2829; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
2830; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2831; CHECK-NEXT:    ret <16 x float> [[TMP16]]
2832;
2833  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
2834  %2 = bitcast i16 %mask to <16 x i1>
2835  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
2836  ret <16 x float> %3
2837}
2838
2839define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
2840; CHECK-LABEL: @test_mm512_mask_add_round_ps_rn_sae(
2841; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2842; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2843; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2844; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2845; CHECK-NEXT:    call void @llvm.donothing()
2846; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2847; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2848; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2849; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2850; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2851; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2852; CHECK:       7:
2853; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2854; CHECK-NEXT:    unreachable
2855; CHECK:       8:
2856; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
2857; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2858; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2859; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2860; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2861; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
2862; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2863; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2864; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2865; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2866; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
2867; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2868; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2869;
2870  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
2871  %2 = bitcast i16 %mask to <16 x i1>
2872  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
2873  ret <16 x float> %3
2874}
2875
2876define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
2877; CHECK-LABEL: @test_mm512_mask_add_round_ps_rd_sae(
2878; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2879; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2880; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2881; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2882; CHECK-NEXT:    call void @llvm.donothing()
2883; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2884; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2885; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2886; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2887; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2888; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2889; CHECK:       7:
2890; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2891; CHECK-NEXT:    unreachable
2892; CHECK:       8:
2893; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
2894; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2895; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2896; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2897; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2898; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
2899; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2900; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2901; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2902; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2903; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
2904; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2905; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2906;
2907  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
2908  %2 = bitcast i16 %mask to <16 x i1>
2909  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
2910  ret <16 x float> %3
2911}
2912
2913define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
2914; CHECK-LABEL: @test_mm512_mask_add_round_ps_ru_sae(
2915; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2916; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2917; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2918; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2919; CHECK-NEXT:    call void @llvm.donothing()
2920; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2921; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2922; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2923; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2924; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2925; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2926; CHECK:       7:
2927; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2928; CHECK-NEXT:    unreachable
2929; CHECK:       8:
2930; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
2931; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2932; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2933; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2934; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2935; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
2936; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2937; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2938; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2939; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2940; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
2941; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2942; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2943;
2944  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
2945  %2 = bitcast i16 %mask to <16 x i1>
2946  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
2947  ret <16 x float> %3
2948}
2949
2950define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
2951; CHECK-LABEL: @test_mm512_mask_add_round_ps_rz_sae(
2952; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2953; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2954; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2955; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2956; CHECK-NEXT:    call void @llvm.donothing()
2957; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2958; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2959; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2960; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2961; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2962; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
2963; CHECK:       7:
2964; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
2965; CHECK-NEXT:    unreachable
2966; CHECK:       8:
2967; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
2968; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
2969; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2970; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
2971; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
2972; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
2973; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
2974; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
2975; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
2976; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
2977; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
2978; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
2979; CHECK-NEXT:    ret <16 x float> [[TMP18]]
2980;
2981  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
2982  %2 = bitcast i16 %mask to <16 x i1>
2983  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
2984  ret <16 x float> %3
2985}
2986
2987define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
2988; CHECK-LABEL: @test_mm512_mask_add_round_ps_current(
2989; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
2990; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
2991; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
2992; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
2993; CHECK-NEXT:    call void @llvm.donothing()
2994; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
2995; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
2996; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
2997; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
2998; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2999; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3000; CHECK:       7:
3001; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3002; CHECK-NEXT:    unreachable
3003; CHECK:       8:
3004; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3005; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3006; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3007; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3008; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3009; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3010; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3011; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3012; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3013; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3014; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3015; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3016; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3017;
3018  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3019  %2 = bitcast i16 %mask to <16 x i1>
3020  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3021  ret <16 x float> %3
3022}
3023
3024define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3025; CHECK-LABEL: @test_mm512_add_round_ps_rn_sae(
3026; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3027; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3028; CHECK-NEXT:    call void @llvm.donothing()
3029; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3030; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3031; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3032; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3033; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3034; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3035; CHECK:       5:
3036; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3037; CHECK-NEXT:    unreachable
3038; CHECK:       6:
3039; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3040; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3041; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3042;
3043  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3044  ret <16 x float> %1
3045}
3046
3047define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3048; CHECK-LABEL: @test_mm512_add_round_ps_rd_sae(
3049; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3050; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3051; CHECK-NEXT:    call void @llvm.donothing()
3052; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3053; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3054; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3055; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3056; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3057; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3058; CHECK:       5:
3059; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3060; CHECK-NEXT:    unreachable
3061; CHECK:       6:
3062; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3063; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3064; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3065;
3066  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3067  ret <16 x float> %1
3068}
3069
3070define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3071; CHECK-LABEL: @test_mm512_add_round_ps_ru_sae(
3072; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3073; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3074; CHECK-NEXT:    call void @llvm.donothing()
3075; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3076; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3077; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3078; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3079; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3080; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3081; CHECK:       5:
3082; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3083; CHECK-NEXT:    unreachable
3084; CHECK:       6:
3085; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3086; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3087; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3088;
3089  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3090  ret <16 x float> %1
3091}
3092
3093define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3094; CHECK-LABEL: @test_mm512_add_round_ps_rz_sae(
3095; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3096; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3097; CHECK-NEXT:    call void @llvm.donothing()
3098; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3099; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3100; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3101; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3102; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3103; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3104; CHECK:       5:
3105; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3106; CHECK-NEXT:    unreachable
3107; CHECK:       6:
3108; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3109; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3110; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3111;
3112  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3113  ret <16 x float> %1
3114}
3115
3116define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3117; CHECK-LABEL: @test_mm512_add_round_ps_current(
3118; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3119; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3120; CHECK-NEXT:    call void @llvm.donothing()
3121; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3122; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3123; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3124; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3125; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3126; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3127; CHECK:       5:
3128; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3129; CHECK-NEXT:    unreachable
3130; CHECK:       6:
3131; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3132; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3133; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3134;
3135  %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3136  ret <16 x float> %1
3137}
3138declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
3139
3140define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3141; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rn_sae(
3142; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3143; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3144; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3145; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3146; CHECK-NEXT:    call void @llvm.donothing()
3147; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3148; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3149; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3150; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3151; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3152; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3153; CHECK:       7:
3154; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3155; CHECK-NEXT:    unreachable
3156; CHECK:       8:
3157; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3158; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3159; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3160; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3161; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3162; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3163; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3164; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3165; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3166; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3167; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3168; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3169; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3170;
3171  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3172  %2 = bitcast i16 %mask to <16 x i1>
3173  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3174  ret <16 x float> %3
3175}
3176
3177define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3178; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rd_sae(
3179; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3180; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3181; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3182; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3183; CHECK-NEXT:    call void @llvm.donothing()
3184; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3185; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3186; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3187; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3188; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3189; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3190; CHECK:       7:
3191; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3192; CHECK-NEXT:    unreachable
3193; CHECK:       8:
3194; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3195; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3196; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3197; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3198; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3199; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3200; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3201; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3202; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3203; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3204; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3205; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3206; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3207;
3208  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3209  %2 = bitcast i16 %mask to <16 x i1>
3210  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3211  ret <16 x float> %3
3212}
3213
3214define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3215; CHECK-LABEL: @test_mm512_mask_sub_round_ps_ru_sae(
3216; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3217; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3218; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3219; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3220; CHECK-NEXT:    call void @llvm.donothing()
3221; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3222; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3223; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3224; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3225; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3226; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3227; CHECK:       7:
3228; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3229; CHECK-NEXT:    unreachable
3230; CHECK:       8:
3231; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3232; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3233; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3234; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3235; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3236; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3237; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3238; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3239; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3240; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3241; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3242; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3243; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3244;
3245  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3246  %2 = bitcast i16 %mask to <16 x i1>
3247  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3248  ret <16 x float> %3
3249}
3250
3251define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3252; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rz_sae(
3253; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3254; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3255; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3256; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3257; CHECK-NEXT:    call void @llvm.donothing()
3258; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3259; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3260; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3261; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3262; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3263; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3264; CHECK:       7:
3265; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3266; CHECK-NEXT:    unreachable
3267; CHECK:       8:
3268; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3269; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3270; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3271; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3272; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3273; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3274; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3275; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3276; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3277; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3278; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3279; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3280; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3281;
3282  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3283  %2 = bitcast i16 %mask to <16 x i1>
3284  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3285  ret <16 x float> %3
3286}
3287
3288define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3289; CHECK-LABEL: @test_mm512_mask_sub_round_ps_current(
3290; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3291; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3292; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3293; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3294; CHECK-NEXT:    call void @llvm.donothing()
3295; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3296; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3297; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3298; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3299; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3300; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3301; CHECK:       7:
3302; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3303; CHECK-NEXT:    unreachable
3304; CHECK:       8:
3305; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3306; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3307; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3308; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3309; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3310; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3311; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3312; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3313; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3314; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3315; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3316; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3317; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3318;
3319  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3320  %2 = bitcast i16 %mask to <16 x i1>
3321  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3322  ret <16 x float> %3
3323}
3324
3325define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3326; CHECK-LABEL: @test_mm512_sub_round_ps_rn_sae(
3327; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3328; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3329; CHECK-NEXT:    call void @llvm.donothing()
3330; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3331; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3332; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3333; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3334; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3335; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3336; CHECK:       5:
3337; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3338; CHECK-NEXT:    unreachable
3339; CHECK:       6:
3340; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3341; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3342; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3343;
3344  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3345  ret <16 x float> %1
3346}
3347
3348define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3349; CHECK-LABEL: @test_mm512_sub_round_ps_rd_sae(
3350; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3351; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3352; CHECK-NEXT:    call void @llvm.donothing()
3353; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3354; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3355; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3356; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3357; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3358; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3359; CHECK:       5:
3360; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3361; CHECK-NEXT:    unreachable
3362; CHECK:       6:
3363; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3364; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3365; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3366;
3367  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3368  ret <16 x float> %1
3369}
3370
3371define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3372; CHECK-LABEL: @test_mm512_sub_round_ps_ru_sae(
3373; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3374; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3375; CHECK-NEXT:    call void @llvm.donothing()
3376; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3377; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3378; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3379; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3380; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3381; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3382; CHECK:       5:
3383; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3384; CHECK-NEXT:    unreachable
3385; CHECK:       6:
3386; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3387; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3388; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3389;
3390  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3391  ret <16 x float> %1
3392}
3393
3394define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3395; CHECK-LABEL: @test_mm512_sub_round_ps_rz_sae(
3396; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3397; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3398; CHECK-NEXT:    call void @llvm.donothing()
3399; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3400; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3401; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3402; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3403; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3404; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3405; CHECK:       5:
3406; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3407; CHECK-NEXT:    unreachable
3408; CHECK:       6:
3409; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3410; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3411; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3412;
3413  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3414  ret <16 x float> %1
3415}
3416
3417define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3418; CHECK-LABEL: @test_mm512_sub_round_ps_current(
3419; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3420; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3421; CHECK-NEXT:    call void @llvm.donothing()
3422; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3423; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3424; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3425; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3426; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3427; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3428; CHECK:       5:
3429; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3430; CHECK-NEXT:    unreachable
3431; CHECK:       6:
3432; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3433; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3434; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3435;
3436  %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3437  ret <16 x float> %1
3438}
3439
3440define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3441; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rn_sae(
3442; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3443; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3444; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3445; CHECK-NEXT:    call void @llvm.donothing()
3446; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3447; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
3448; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3449; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
3450; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3451; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
3452; CHECK:       6:
3453; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3454; CHECK-NEXT:    unreachable
3455; CHECK:       7:
3456; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3457; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3458; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3459; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
3460; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3461; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3462; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
3463; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3464; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3465; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3466; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3467; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3468;
3469  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3470  %2 = bitcast i16 %mask to <16 x i1>
3471  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3472  ret <16 x float> %3
3473}
3474
3475define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3476; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rd_sae(
3477; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3478; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3479; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3480; CHECK-NEXT:    call void @llvm.donothing()
3481; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3482; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
3483; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3484; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
3485; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3486; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
3487; CHECK:       6:
3488; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3489; CHECK-NEXT:    unreachable
3490; CHECK:       7:
3491; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3492; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3493; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3494; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
3495; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3496; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3497; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
3498; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3499; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3500; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3501; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3502; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3503;
3504  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3505  %2 = bitcast i16 %mask to <16 x i1>
3506  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3507  ret <16 x float> %3
3508}
3509
3510define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3511; CHECK-LABEL: @test_mm512_maskz_div_round_ps_ru_sae(
3512; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3513; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3514; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3515; CHECK-NEXT:    call void @llvm.donothing()
3516; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3517; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
3518; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3519; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
3520; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3521; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
3522; CHECK:       6:
3523; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3524; CHECK-NEXT:    unreachable
3525; CHECK:       7:
3526; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3527; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3528; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3529; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
3530; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3531; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3532; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
3533; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3534; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3535; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3536; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3537; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3538;
3539  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3540  %2 = bitcast i16 %mask to <16 x i1>
3541  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3542  ret <16 x float> %3
3543}
3544
3545define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3546; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rz_sae(
3547; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3548; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3549; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3550; CHECK-NEXT:    call void @llvm.donothing()
3551; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3552; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
3553; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3554; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
3555; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3556; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
3557; CHECK:       6:
3558; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3559; CHECK-NEXT:    unreachable
3560; CHECK:       7:
3561; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3562; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3563; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3564; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
3565; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3566; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3567; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
3568; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3569; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3570; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3571; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3572; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3573;
3574  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3575  %2 = bitcast i16 %mask to <16 x i1>
3576  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3577  ret <16 x float> %3
3578}
3579
3580define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3581; CHECK-LABEL: @test_mm512_maskz_div_round_ps_current(
3582; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3583; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3584; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3585; CHECK-NEXT:    call void @llvm.donothing()
3586; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3587; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
3588; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3589; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
3590; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3591; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
3592; CHECK:       6:
3593; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3594; CHECK-NEXT:    unreachable
3595; CHECK:       7:
3596; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3597; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3598; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3599; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
3600; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3601; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3602; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
3603; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3604; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3605; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3606; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3607; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3608;
3609  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3610  %2 = bitcast i16 %mask to <16 x i1>
3611  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3612  ret <16 x float> %3
3613}
3614
3615define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3616; CHECK-LABEL: @test_mm512_mask_div_round_ps_rn_sae(
3617; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3618; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3619; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3620; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3621; CHECK-NEXT:    call void @llvm.donothing()
3622; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3623; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3624; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3625; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3626; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3627; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3628; CHECK:       7:
3629; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3630; CHECK-NEXT:    unreachable
3631; CHECK:       8:
3632; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3633; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3634; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3635; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3636; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3637; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3638; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3639; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3640; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3641; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3642; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3643; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3644; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3645;
3646  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3647  %2 = bitcast i16 %mask to <16 x i1>
3648  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3649  ret <16 x float> %3
3650}
3651
3652define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3653; CHECK-LABEL: @test_mm512_mask_div_round_ps_rd_sae(
3654; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3655; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3656; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3657; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3658; CHECK-NEXT:    call void @llvm.donothing()
3659; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3660; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3661; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3662; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3663; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3664; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3665; CHECK:       7:
3666; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3667; CHECK-NEXT:    unreachable
3668; CHECK:       8:
3669; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3670; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3671; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3672; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3673; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3674; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3675; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3676; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3677; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3678; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3679; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3680; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3681; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3682;
3683  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3684  %2 = bitcast i16 %mask to <16 x i1>
3685  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3686  ret <16 x float> %3
3687}
3688
3689define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3690; CHECK-LABEL: @test_mm512_mask_div_round_ps_ru_sae(
3691; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3692; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3693; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3694; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3695; CHECK-NEXT:    call void @llvm.donothing()
3696; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3697; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3698; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3699; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3700; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3701; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3702; CHECK:       7:
3703; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3704; CHECK-NEXT:    unreachable
3705; CHECK:       8:
3706; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3707; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3708; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3709; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3710; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3711; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3712; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3713; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3714; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3715; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3716; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3717; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3718; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3719;
3720  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3721  %2 = bitcast i16 %mask to <16 x i1>
3722  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3723  ret <16 x float> %3
3724}
3725
3726define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3727; CHECK-LABEL: @test_mm512_mask_div_round_ps_rz_sae(
3728; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3729; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3730; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3731; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3732; CHECK-NEXT:    call void @llvm.donothing()
3733; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3734; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3735; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3736; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3737; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3738; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3739; CHECK:       7:
3740; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3741; CHECK-NEXT:    unreachable
3742; CHECK:       8:
3743; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3744; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3745; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3746; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3747; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3748; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3749; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3750; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3751; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3752; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3753; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3754; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3755; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3756;
3757  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3758  %2 = bitcast i16 %mask to <16 x i1>
3759  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3760  ret <16 x float> %3
3761}
3762
3763define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3764; CHECK-LABEL: @test_mm512_mask_div_round_ps_current(
3765; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3766; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3767; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3768; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3769; CHECK-NEXT:    call void @llvm.donothing()
3770; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3771; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
3772; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3773; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
3774; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3775; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
3776; CHECK:       7:
3777; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3778; CHECK-NEXT:    unreachable
3779; CHECK:       8:
3780; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3781; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3782; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3783; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
3784; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3785; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3786; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3787; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
3788; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3789; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3790; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3791; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3792; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3793;
3794  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3795  %2 = bitcast i16 %mask to <16 x i1>
3796  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3797  ret <16 x float> %3
3798}
3799
3800define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3801; CHECK-LABEL: @test_mm512_div_round_ps_rn_sae(
3802; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3803; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3804; CHECK-NEXT:    call void @llvm.donothing()
3805; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3806; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3807; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3808; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3809; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3810; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3811; CHECK:       5:
3812; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3813; CHECK-NEXT:    unreachable
3814; CHECK:       6:
3815; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3816; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3817; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3818;
3819  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3820  ret <16 x float> %1
3821}
3822
3823define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3824; CHECK-LABEL: @test_mm512_div_round_ps_rd_sae(
3825; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3826; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3827; CHECK-NEXT:    call void @llvm.donothing()
3828; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3829; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3830; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3831; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3832; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3833; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3834; CHECK:       5:
3835; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3836; CHECK-NEXT:    unreachable
3837; CHECK:       6:
3838; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
3839; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3840; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3841;
3842  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
3843  ret <16 x float> %1
3844}
3845
3846define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3847; CHECK-LABEL: @test_mm512_div_round_ps_ru_sae(
3848; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3849; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3850; CHECK-NEXT:    call void @llvm.donothing()
3851; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3852; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3853; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3854; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3855; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3856; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3857; CHECK:       5:
3858; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3859; CHECK-NEXT:    unreachable
3860; CHECK:       6:
3861; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
3862; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3863; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3864;
3865  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
3866  ret <16 x float> %1
3867}
3868
3869define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3870; CHECK-LABEL: @test_mm512_div_round_ps_rz_sae(
3871; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3872; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3873; CHECK-NEXT:    call void @llvm.donothing()
3874; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3875; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3876; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3877; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3878; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3879; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3880; CHECK:       5:
3881; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3882; CHECK-NEXT:    unreachable
3883; CHECK:       6:
3884; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
3885; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3886; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3887;
3888  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
3889  ret <16 x float> %1
3890}
3891
3892define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3893; CHECK-LABEL: @test_mm512_div_round_ps_current(
3894; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3895; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3896; CHECK-NEXT:    call void @llvm.donothing()
3897; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
3898; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
3899; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
3900; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
3901; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3902; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3903; CHECK:       5:
3904; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
3905; CHECK-NEXT:    unreachable
3906; CHECK:       6:
3907; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3908; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3909; CHECK-NEXT:    ret <16 x float> [[TMP7]]
3910;
3911  %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3912  ret <16 x float> %1
3913}
3914declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
3915
3916define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3917; CHECK-LABEL: @test_mm512_maskz_min_round_ps_sae(
3918; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3919; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3920; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3921; CHECK-NEXT:    call void @llvm.donothing()
3922; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
3923; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
3924; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3925; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3926; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3927; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
3928; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3929; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3930; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
3931; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3932; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3933; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3934; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3935; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3936;
3937  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3938  %2 = bitcast i16 %mask to <16 x i1>
3939  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3940  ret <16 x float> %3
3941}
3942
3943define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
3944; CHECK-LABEL: @test_mm512_maskz_min_round_ps_current(
3945; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3946; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3947; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3948; CHECK-NEXT:    call void @llvm.donothing()
3949; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
3950; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
3951; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
3952; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3953; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3954; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
3955; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
3956; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
3957; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
3958; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
3959; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
3960; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
3961; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3962; CHECK-NEXT:    ret <16 x float> [[TMP16]]
3963;
3964  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
3965  %2 = bitcast i16 %mask to <16 x i1>
3966  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
3967  ret <16 x float> %3
3968}
3969
3970define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
3971; CHECK-LABEL: @test_mm512_mask_min_round_ps_sae(
3972; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
3973; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
3974; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
3975; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
3976; CHECK-NEXT:    call void @llvm.donothing()
3977; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
3978; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
3979; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
3980; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
3981; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3982; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
3983; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
3984; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
3985; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
3986; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
3987; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
3988; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
3989; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
3990; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
3991; CHECK-NEXT:    ret <16 x float> [[TMP18]]
3992;
3993  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
3994  %2 = bitcast i16 %mask to <16 x i1>
3995  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
3996  ret <16 x float> %3
3997}
3998
3999define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
4000; CHECK-LABEL: @test_mm512_mask_min_round_ps_current(
4001; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4002; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4003; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
4004; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
4005; CHECK-NEXT:    call void @llvm.donothing()
4006; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4007; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4008; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
4009; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
4010; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
4011; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
4012; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
4013; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
4014; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
4015; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
4016; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
4017; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
4018; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
4019; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
4020; CHECK-NEXT:    ret <16 x float> [[TMP18]]
4021;
4022  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
4023  %2 = bitcast i16 %mask to <16 x i1>
4024  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
4025  ret <16 x float> %3
4026}
4027
4028define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4029; CHECK-LABEL: @test_mm512_min_round_ps_sae(
4030; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4031; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4032; CHECK-NEXT:    call void @llvm.donothing()
4033; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4034; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4035; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
4036; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
4037; CHECK-NEXT:    ret <16 x float> [[TMP7]]
4038;
4039  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
4040  ret <16 x float> %1
4041}
4042
4043define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4044; CHECK-LABEL: @test_mm512_min_round_ps_current(
4045; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4046; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4047; CHECK-NEXT:    call void @llvm.donothing()
4048; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4049; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4050; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
4051; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
4052; CHECK-NEXT:    ret <16 x float> [[TMP7]]
4053;
4054  %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
4055  ret <16 x float> %1
4056}
4057declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
4058
4059define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4060; CHECK-LABEL: @test_mm512_maskz_max_round_ps_sae(
4061; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4062; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4063; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
4064; CHECK-NEXT:    call void @llvm.donothing()
4065; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4066; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4067; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
4068; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
4069; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
4070; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
4071; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
4072; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
4073; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
4074; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
4075; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
4076; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
4077; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
4078; CHECK-NEXT:    ret <16 x float> [[TMP16]]
4079;
4080  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
4081  %2 = bitcast i16 %mask to <16 x i1>
4082  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
4083  ret <16 x float> %3
4084}
4085
4086define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4087; CHECK-LABEL: @test_mm512_maskz_max_round_ps_current(
4088; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4089; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4090; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
4091; CHECK-NEXT:    call void @llvm.donothing()
4092; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4093; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4094; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
4095; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
4096; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
4097; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
4098; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
4099; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
4100; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
4101; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
4102; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
4103; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
4104; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
4105; CHECK-NEXT:    ret <16 x float> [[TMP16]]
4106;
4107  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
4108  %2 = bitcast i16 %mask to <16 x i1>
4109  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
4110  ret <16 x float> %3
4111}
4112
4113define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
4114; CHECK-LABEL: @test_mm512_mask_max_round_ps_sae(
4115; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4116; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4117; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
4118; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
4119; CHECK-NEXT:    call void @llvm.donothing()
4120; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4121; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4122; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
4123; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
4124; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
4125; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
4126; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
4127; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
4128; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
4129; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
4130; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
4131; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
4132; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
4133; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
4134; CHECK-NEXT:    ret <16 x float> [[TMP18]]
4135;
4136  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
4137  %2 = bitcast i16 %mask to <16 x i1>
4138  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
4139  ret <16 x float> %3
4140}
4141
4142define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
4143; CHECK-LABEL: @test_mm512_mask_max_round_ps_current(
4144; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4145; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4146; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
4147; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
4148; CHECK-NEXT:    call void @llvm.donothing()
4149; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4150; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4151; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
4152; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
4153; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
4154; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
4155; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
4156; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
4157; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
4158; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
4159; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
4160; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
4161; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
4162; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
4163; CHECK-NEXT:    ret <16 x float> [[TMP18]]
4164;
4165  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
4166  %2 = bitcast i16 %mask to <16 x i1>
4167  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
4168  ret <16 x float> %3
4169}
4170
4171define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4172; CHECK-LABEL: @test_mm512_max_round_ps_sae(
4173; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4174; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4175; CHECK-NEXT:    call void @llvm.donothing()
4176; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4177; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4178; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
4179; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
4180; CHECK-NEXT:    ret <16 x float> [[TMP7]]
4181;
4182  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
4183  ret <16 x float> %1
4184}
4185
4186define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
4187; CHECK-LABEL: @test_mm512_max_round_ps_current(
4188; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
4189; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
4190; CHECK-NEXT:    call void @llvm.donothing()
4191; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
4192; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
4193; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
4194; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
4195; CHECK-NEXT:    ret <16 x float> [[TMP7]]
4196;
4197  %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
4198  ret <16 x float> %1
4199}
4200declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
4201
4202declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4203
4204define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4205; CHECK-LABEL: @test_mask_add_ss_rn(
4206; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4207; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4208; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4209; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4210; CHECK-NEXT:    call void @llvm.donothing()
4211; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4212; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4213; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4214; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4215; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4216; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4217; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4218; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4219; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4220; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4221; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4222; CHECK:       8:
4223; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4224; CHECK-NEXT:    unreachable
4225; CHECK:       9:
4226; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
4227; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4228; CHECK-NEXT:    ret <4 x float> [[RES]]
4229;
4230  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4231  ret <4 x float> %res
4232}
4233
4234define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4235; CHECK-LABEL: @test_mask_add_ss_rd(
4236; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4237; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4238; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4239; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4240; CHECK-NEXT:    call void @llvm.donothing()
4241; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4242; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4243; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4244; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4245; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4246; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4247; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4248; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4249; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4250; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4251; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4252; CHECK:       8:
4253; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4254; CHECK-NEXT:    unreachable
4255; CHECK:       9:
4256; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
4257; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4258; CHECK-NEXT:    ret <4 x float> [[RES]]
4259;
4260  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
4261  ret <4 x float> %res
4262}
4263
4264define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4265; CHECK-LABEL: @test_mask_add_ss_ru(
4266; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4267; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4268; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4269; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4270; CHECK-NEXT:    call void @llvm.donothing()
4271; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4272; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4273; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4274; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4275; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4276; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4277; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4278; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4279; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4280; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4281; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4282; CHECK:       8:
4283; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4284; CHECK-NEXT:    unreachable
4285; CHECK:       9:
4286; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
4287; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4288; CHECK-NEXT:    ret <4 x float> [[RES]]
4289;
4290  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10)
4291  ret <4 x float> %res
4292}
4293
4294define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4295; CHECK-LABEL: @test_mask_add_ss_rz(
4296; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4297; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4298; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4299; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4300; CHECK-NEXT:    call void @llvm.donothing()
4301; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4302; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4303; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4304; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4305; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4306; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4307; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4308; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4309; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4310; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4311; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4312; CHECK:       8:
4313; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4314; CHECK-NEXT:    unreachable
4315; CHECK:       9:
4316; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
4317; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4318; CHECK-NEXT:    ret <4 x float> [[RES]]
4319;
4320  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11)
4321  ret <4 x float> %res
4322}
4323
4324define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4325; CHECK-LABEL: @test_mask_add_ss_current(
4326; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4327; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4328; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4329; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4330; CHECK-NEXT:    call void @llvm.donothing()
4331; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4332; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4333; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4334; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4335; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4336; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4337; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4338; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4339; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4340; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4341; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4342; CHECK:       8:
4343; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4344; CHECK-NEXT:    unreachable
4345; CHECK:       9:
4346; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
4347; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4348; CHECK-NEXT:    ret <4 x float> [[RES]]
4349;
4350  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4351  ret <4 x float> %res
4352}
4353
4354define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
4355; CHECK-LABEL: @test_maskz_add_ss_rn(
4356; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4357; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4358; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4359; CHECK-NEXT:    call void @llvm.donothing()
4360; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4361; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
4362; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4363; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
4364; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4365; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
4366; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4367; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
4368; CHECK:       6:
4369; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4370; CHECK-NEXT:    unreachable
4371; CHECK:       7:
4372; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
4373; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4374; CHECK-NEXT:    ret <4 x float> [[RES]]
4375;
4376  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4377  ret <4 x float> %res
4378}
4379
4380define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) #0 {
4381; CHECK-LABEL: @test_add_ss_rn(
4382; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4383; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4384; CHECK-NEXT:    call void @llvm.donothing()
4385; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4386; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
4387; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4388; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
4389; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4390; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4391; CHECK:       5:
4392; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4393; CHECK-NEXT:    unreachable
4394; CHECK:       6:
4395; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
4396; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4397; CHECK-NEXT:    ret <4 x float> [[RES]]
4398;
4399  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4400  ret <4 x float> %res
4401}
4402
4403define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask, <4 x float> %extra_param) #0 {
4404; CHECK-LABEL: @test_mask_add_ss_current_memfold(
4405; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4406; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4407; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4408; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
4409; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
4410; CHECK-NEXT:    call void @llvm.donothing()
4411; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4412; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
4413; CHECK:       6:
4414; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4415; CHECK-NEXT:    unreachable
4416; CHECK:       7:
4417; CHECK-NEXT:    [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
4418; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
4419; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
4420; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
4421; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
4422; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD]], i32 0
4423; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A1_VAL]], i32 0
4424; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
4425; CHECK-NEXT:    [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
4426; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
4427; CHECK-NEXT:    [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
4428; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
4429; CHECK-NEXT:    [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
4430; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4431; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
4432; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
4433; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
4434; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
4435; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4436; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
4437; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
4438; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
4439; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
4440; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
4441; CHECK:       14:
4442; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4443; CHECK-NEXT:    unreachable
4444; CHECK:       15:
4445; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
4446; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4447; CHECK-NEXT:    ret <4 x float> [[RES]]
4448;
4449  %a1.val = load float, ptr %a1
4450  %a1v0 = insertelement <4 x float> %extra_param, float %a1.val, i32 0
4451  %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
4452  %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
4453  %a1v  = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
4454  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
4455  ret <4 x float> %res
4456}
4457
4458define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, ptr %a1, i8 %mask, <4 x float> %extra_param) #0 {
4459; CHECK-LABEL: @test_maskz_add_ss_current_memfold(
4460; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4461; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4462; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4463; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
4464; CHECK-NEXT:    call void @llvm.donothing()
4465; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4466; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
4467; CHECK:       5:
4468; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4469; CHECK-NEXT:    unreachable
4470; CHECK:       6:
4471; CHECK-NEXT:    [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
4472; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
4473; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
4474; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
4475; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
4476; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[_MSLD]], i32 0
4477; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A1_VAL]], i32 0
4478; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
4479; CHECK-NEXT:    [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
4480; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
4481; CHECK-NEXT:    [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
4482; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
4483; CHECK-NEXT:    [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
4484; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4485; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
4486; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
4487; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
4488; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
4489; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
4490; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
4491; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
4492; CHECK:       12:
4493; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4494; CHECK-NEXT:    unreachable
4495; CHECK:       13:
4496; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
4497; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4498; CHECK-NEXT:    ret <4 x float> [[RES]]
4499;
4500  %a1.val = load float, ptr %a1
4501  %a1v0 = insertelement <4 x float> %extra_param, float %a1.val, i32 0
4502  %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
4503  %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
4504  %a1v  = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
4505  %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
4506  ret <4 x float> %res
4507}
4508
4509declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
4510
4511define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
4512; CHECK-LABEL: @test_mask_add_sd_rn(
4513; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4514; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4515; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4516; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4517; CHECK-NEXT:    call void @llvm.donothing()
4518; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4519; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4520; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4521; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4522; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4523; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4524; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4525; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4526; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4527; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4528; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4529; CHECK:       8:
4530; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4531; CHECK-NEXT:    unreachable
4532; CHECK:       9:
4533; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
4534; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4535; CHECK-NEXT:    ret <2 x double> [[RES]]
4536;
4537  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
4538  ret <2 x double> %res
4539}
4540
4541define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
4542; CHECK-LABEL: @test_mask_add_sd_rd(
4543; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4544; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4545; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4546; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4547; CHECK-NEXT:    call void @llvm.donothing()
4548; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4549; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4550; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4551; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4552; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4553; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4554; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4555; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4556; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4557; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4558; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4559; CHECK:       8:
4560; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4561; CHECK-NEXT:    unreachable
4562; CHECK:       9:
4563; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
4564; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4565; CHECK-NEXT:    ret <2 x double> [[RES]]
4566;
4567  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
4568  ret <2 x double> %res
4569}
4570
4571define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
4572; CHECK-LABEL: @test_mask_add_sd_ru(
4573; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4574; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4575; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4576; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4577; CHECK-NEXT:    call void @llvm.donothing()
4578; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4579; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4580; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4581; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4582; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4583; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4584; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4585; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4586; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4587; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4588; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4589; CHECK:       8:
4590; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4591; CHECK-NEXT:    unreachable
4592; CHECK:       9:
4593; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
4594; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4595; CHECK-NEXT:    ret <2 x double> [[RES]]
4596;
4597  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10)
4598  ret <2 x double> %res
4599}
4600
4601define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
4602; CHECK-LABEL: @test_mask_add_sd_rz(
4603; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4604; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4605; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4606; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4607; CHECK-NEXT:    call void @llvm.donothing()
4608; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4609; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4610; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4611; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4612; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4613; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4614; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4615; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4616; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4617; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4618; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4619; CHECK:       8:
4620; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4621; CHECK-NEXT:    unreachable
4622; CHECK:       9:
4623; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
4624; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4625; CHECK-NEXT:    ret <2 x double> [[RES]]
4626;
4627  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11)
4628  ret <2 x double> %res
4629}
4630
4631define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
4632; CHECK-LABEL: @test_mask_add_sd_current(
4633; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4634; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4635; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4636; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4637; CHECK-NEXT:    call void @llvm.donothing()
4638; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4639; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4640; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4641; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4642; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4643; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4644; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4645; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4646; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4647; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4648; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4649; CHECK:       8:
4650; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4651; CHECK-NEXT:    unreachable
4652; CHECK:       9:
4653; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
4654; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4655; CHECK-NEXT:    ret <2 x double> [[RES]]
4656;
4657  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
4658  ret <2 x double> %res
4659}
4660
4661define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
4662; CHECK-LABEL: @test_maskz_add_sd_rn(
4663; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4664; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4665; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4666; CHECK-NEXT:    call void @llvm.donothing()
4667; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4668; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
4669; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4670; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
4671; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4672; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
4673; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4674; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
4675; CHECK:       6:
4676; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4677; CHECK-NEXT:    unreachable
4678; CHECK:       7:
4679; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
4680; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4681; CHECK-NEXT:    ret <2 x double> [[RES]]
4682;
4683  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
4684  ret <2 x double> %res
4685}
4686
4687define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) #0 {
4688; CHECK-LABEL: @test_add_sd_rn(
4689; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4690; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4691; CHECK-NEXT:    call void @llvm.donothing()
4692; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
4693; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
4694; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4695; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
4696; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4697; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4698; CHECK:       5:
4699; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4700; CHECK-NEXT:    unreachable
4701; CHECK:       6:
4702; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
4703; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4704; CHECK-NEXT:    ret <2 x double> [[RES]]
4705;
4706  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
4707  ret <2 x double> %res
4708}
4709
4710define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask, <2 x double> %extra_param) #0 {
4711; CHECK-LABEL: @test_mask_add_sd_current_memfold(
4712; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4713; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4714; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4715; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
4716; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
4717; CHECK-NEXT:    call void @llvm.donothing()
4718; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4719; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
4720; CHECK:       6:
4721; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4722; CHECK-NEXT:    unreachable
4723; CHECK:       7:
4724; CHECK-NEXT:    [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
4725; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
4726; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
4727; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
4728; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
4729; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[_MSLD]], i32 0
4730; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A1_VAL]], i32 0
4731; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
4732; CHECK-NEXT:    [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
4733; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4734; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
4735; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
4736; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
4737; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
4738; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
4739; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
4740; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
4741; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
4742; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
4743; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
4744; CHECK:       14:
4745; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4746; CHECK-NEXT:    unreachable
4747; CHECK:       15:
4748; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
4749; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4750; CHECK-NEXT:    ret <2 x double> [[RES]]
4751;
4752  %a1.val = load double, ptr %a1
4753  %a1v0 = insertelement <2 x double> %extra_param, double %a1.val, i32 0
4754  %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
4755  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
4756  ret <2 x double> %res
4757}
4758
4759define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, ptr %a1, i8 %mask, <2 x double> %extra_param) #0 {
4760; CHECK-LABEL: @test_maskz_add_sd_current_memfold(
4761; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4762; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4763; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
4764; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
4765; CHECK-NEXT:    call void @llvm.donothing()
4766; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4767; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
4768; CHECK:       5:
4769; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4770; CHECK-NEXT:    unreachable
4771; CHECK:       6:
4772; CHECK-NEXT:    [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
4773; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
4774; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
4775; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
4776; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
4777; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[_MSLD]], i32 0
4778; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A1_VAL]], i32 0
4779; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
4780; CHECK-NEXT:    [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
4781; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
4782; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
4783; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
4784; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
4785; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
4786; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
4787; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
4788; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
4789; CHECK:       12:
4790; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4791; CHECK-NEXT:    unreachable
4792; CHECK:       13:
4793; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
4794; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
4795; CHECK-NEXT:    ret <2 x double> [[RES]]
4796;
4797  %a1.val = load double, ptr %a1
4798  %a1v0 = insertelement <2 x double> %extra_param, double %a1.val, i32 0
4799  %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
4800  %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
4801  ret <2 x double> %res
4802}
4803
4804declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
4805
4806define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4807; CHECK-LABEL: @test_mask_max_ss_sae(
4808; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4809; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4810; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4811; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4812; CHECK-NEXT:    call void @llvm.donothing()
4813; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4814; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4815; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4816; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4817; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4818; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4819; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4820; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4821; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4822; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4823; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4824; CHECK:       8:
4825; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4826; CHECK-NEXT:    unreachable
4827; CHECK:       9:
4828; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
4829; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4830; CHECK-NEXT:    ret <4 x float> [[RES]]
4831;
4832  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
4833  ret <4 x float> %res
4834}
4835
4836define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
4837; CHECK-LABEL: @test_maskz_max_ss_sae(
4838; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4839; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4840; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4841; CHECK-NEXT:    call void @llvm.donothing()
4842; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4843; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
4844; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4845; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
4846; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4847; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
4848; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4849; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
4850; CHECK:       6:
4851; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4852; CHECK-NEXT:    unreachable
4853; CHECK:       7:
4854; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
4855; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4856; CHECK-NEXT:    ret <4 x float> [[RES]]
4857;
4858  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
4859  ret <4 x float> %res
4860}
4861
4862define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) #0 {
4863; CHECK-LABEL: @test_max_ss_sae(
4864; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4865; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4866; CHECK-NEXT:    call void @llvm.donothing()
4867; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4868; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
4869; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4870; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
4871; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4872; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4873; CHECK:       5:
4874; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4875; CHECK-NEXT:    unreachable
4876; CHECK:       6:
4877; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
4878; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4879; CHECK-NEXT:    ret <4 x float> [[RES]]
4880;
4881  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
4882  ret <4 x float> %res
4883}
4884
4885define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
4886; CHECK-LABEL: @test_mask_max_ss(
4887; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4888; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4889; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4890; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4891; CHECK-NEXT:    call void @llvm.donothing()
4892; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4893; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
4894; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4895; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
4896; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4897; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4898; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
4899; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4900; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
4901; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
4902; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4903; CHECK:       8:
4904; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4905; CHECK-NEXT:    unreachable
4906; CHECK:       9:
4907; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
4908; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4909; CHECK-NEXT:    ret <4 x float> [[RES]]
4910;
4911  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
4912  ret <4 x float> %res
4913}
4914
4915define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
4916; CHECK-LABEL: @test_maskz_max_ss(
4917; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4918; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4919; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
4920; CHECK-NEXT:    call void @llvm.donothing()
4921; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4922; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
4923; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4924; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
4925; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4926; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
4927; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
4928; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
4929; CHECK:       6:
4930; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4931; CHECK-NEXT:    unreachable
4932; CHECK:       7:
4933; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
4934; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4935; CHECK-NEXT:    ret <4 x float> [[RES]]
4936;
4937  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
4938  ret <4 x float> %res
4939}
4940
4941define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
4942; CHECK-LABEL: @test_max_ss(
4943; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4944; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4945; CHECK-NEXT:    call void @llvm.donothing()
4946; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
4947; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
4948; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4949; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
4950; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
4951; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
4952; CHECK:       5:
4953; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4954; CHECK-NEXT:    unreachable
4955; CHECK:       6:
4956; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 4)
4957; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
4958; CHECK-NEXT:    ret <4 x float> [[RES]]
4959;
4960  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
4961  ret <4 x float> %res
4962}
4963
4964define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask, <4 x float> %extra_param) #0 {
4965; CHECK-LABEL: @test_mask_max_ss_memfold(
4966; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
4967; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
4968; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
4969; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
4970; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
4971; CHECK-NEXT:    call void @llvm.donothing()
4972; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4973; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
4974; CHECK:       6:
4975; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
4976; CHECK-NEXT:    unreachable
4977; CHECK:       7:
4978; CHECK-NEXT:    [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
4979; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
4980; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
4981; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
4982; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
4983; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD]], i32 0
4984; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A1_VAL]], i32 0
4985; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
4986; CHECK-NEXT:    [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
4987; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
4988; CHECK-NEXT:    [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
4989; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
4990; CHECK-NEXT:    [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
4991; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
4992; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
4993; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
4994; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
4995; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
4996; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
4997; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
4998; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
4999; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
5000; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
5001; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
5002; CHECK:       14:
5003; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5004; CHECK-NEXT:    unreachable
5005; CHECK:       15:
5006; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
5007; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
5008; CHECK-NEXT:    ret <4 x float> [[RES]]
5009;
5010  %a1.val = load float, ptr %a1
5011  %a1v0 = insertelement <4 x float> %extra_param, float %a1.val, i32 0
5012  %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
5013  %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
5014  %a1v  = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
5015  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
5016  ret <4 x float> %res
5017}
5018
5019define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, ptr %a1, i8 %mask, <4 x float> %extra_param) #0 {
5020; CHECK-LABEL: @test_maskz_max_ss_memfold(
5021; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5022; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5023; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5024; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
5025; CHECK-NEXT:    call void @llvm.donothing()
5026; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5027; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
5028; CHECK:       5:
5029; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5030; CHECK-NEXT:    unreachable
5031; CHECK:       6:
5032; CHECK-NEXT:    [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
5033; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
5034; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
5035; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
5036; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
5037; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[_MSLD]], i32 0
5038; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A1_VAL]], i32 0
5039; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
5040; CHECK-NEXT:    [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
5041; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
5042; CHECK-NEXT:    [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
5043; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
5044; CHECK-NEXT:    [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
5045; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
5046; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
5047; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
5048; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
5049; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
5050; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
5051; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
5052; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
5053; CHECK:       12:
5054; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5055; CHECK-NEXT:    unreachable
5056; CHECK:       13:
5057; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
5058; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
5059; CHECK-NEXT:    ret <4 x float> [[RES]]
5060;
5061  %a1.val = load float, ptr %a1
5062  %a1v0 = insertelement <4 x float> %extra_param, float %a1.val, i32 0
5063  %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
5064  %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
5065  %a1v  = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
5066  %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
5067  ret <4 x float> %res
5068}
5069declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
5070
5071define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
5072; CHECK-LABEL: @test_mask_max_sd_sae(
5073; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5074; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5075; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5076; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
5077; CHECK-NEXT:    call void @llvm.donothing()
5078; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5079; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
5080; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5081; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
5082; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5083; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5084; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
5085; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5086; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
5087; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
5088; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5089; CHECK:       8:
5090; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5091; CHECK-NEXT:    unreachable
5092; CHECK:       9:
5093; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
5094; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5095; CHECK-NEXT:    ret <2 x double> [[RES]]
5096;
5097  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
5098  ret <2 x double> %res
5099}
5100
5101define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
5102; CHECK-LABEL: @test_maskz_max_sd_sae(
5103; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5104; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5105; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5106; CHECK-NEXT:    call void @llvm.donothing()
5107; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5108; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
5109; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5110; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
5111; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5112; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
5113; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5114; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
5115; CHECK:       6:
5116; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5117; CHECK-NEXT:    unreachable
5118; CHECK:       7:
5119; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
5120; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5121; CHECK-NEXT:    ret <2 x double> [[RES]]
5122;
5123  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
5124  ret <2 x double> %res
5125}
5126
5127define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) #0 {
5128; CHECK-LABEL: @test_max_sd_sae(
5129; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5130; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5131; CHECK-NEXT:    call void @llvm.donothing()
5132; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5133; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
5134; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5135; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
5136; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5137; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
5138; CHECK:       5:
5139; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5140; CHECK-NEXT:    unreachable
5141; CHECK:       6:
5142; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
5143; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5144; CHECK-NEXT:    ret <2 x double> [[RES]]
5145;
5146  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
5147  ret <2 x double> %res
5148}
5149
5150define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
5151; CHECK-LABEL: @test_mask_max_sd(
5152; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5153; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5154; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5155; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
5156; CHECK-NEXT:    call void @llvm.donothing()
5157; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5158; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
5159; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5160; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
5161; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5162; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5163; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
5164; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5165; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
5166; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
5167; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5168; CHECK:       8:
5169; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5170; CHECK-NEXT:    unreachable
5171; CHECK:       9:
5172; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
5173; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5174; CHECK-NEXT:    ret <2 x double> [[RES]]
5175;
5176  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
5177  ret <2 x double> %res
5178}
5179
5180define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
5181; CHECK-LABEL: @test_maskz_max_sd(
5182; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5183; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5184; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5185; CHECK-NEXT:    call void @llvm.donothing()
5186; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5187; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
5188; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5189; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
5190; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5191; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
5192; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5193; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
5194; CHECK:       6:
5195; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5196; CHECK-NEXT:    unreachable
5197; CHECK:       7:
5198; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
5199; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5200; CHECK-NEXT:    ret <2 x double> [[RES]]
5201;
5202  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
5203  ret <2 x double> %res
5204}
5205
5206define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) #0 {
5207; CHECK-LABEL: @test_max_sd(
5208; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5209; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5210; CHECK-NEXT:    call void @llvm.donothing()
5211; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
5212; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
5213; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5214; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
5215; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5216; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
5217; CHECK:       5:
5218; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5219; CHECK-NEXT:    unreachable
5220; CHECK:       6:
5221; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
5222; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5223; CHECK-NEXT:    ret <2 x double> [[RES]]
5224;
5225  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
5226  ret <2 x double> %res
5227}
5228
5229define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask, <2 x double> %extra_param) #0 {
5230; CHECK-LABEL: @test_mask_max_sd_memfold(
5231; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5232; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
5233; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5234; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
5235; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
5236; CHECK-NEXT:    call void @llvm.donothing()
5237; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5238; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
5239; CHECK:       6:
5240; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5241; CHECK-NEXT:    unreachable
5242; CHECK:       7:
5243; CHECK-NEXT:    [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
5244; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
5245; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
5246; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
5247; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
5248; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[_MSLD]], i32 0
5249; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A1_VAL]], i32 0
5250; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
5251; CHECK-NEXT:    [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
5252; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5253; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
5254; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
5255; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
5256; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
5257; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
5258; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
5259; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
5260; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
5261; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
5262; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
5263; CHECK:       14:
5264; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5265; CHECK-NEXT:    unreachable
5266; CHECK:       15:
5267; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
5268; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5269; CHECK-NEXT:    ret <2 x double> [[RES]]
5270;
5271  %a1.val = load double, ptr %a1
5272  %a1v0 = insertelement <2 x double> %extra_param, double %a1.val, i32 0
5273  %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
5274  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
5275  ret <2 x double> %res
5276}
5277
5278define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, ptr %a1, i8 %mask, <2 x double> %extra_param) #0 {
5279; CHECK-LABEL: @test_maskz_max_sd_memfold(
5280; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5281; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
5282; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
5283; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
5284; CHECK-NEXT:    call void @llvm.donothing()
5285; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5286; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
5287; CHECK:       5:
5288; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5289; CHECK-NEXT:    unreachable
5290; CHECK:       6:
5291; CHECK-NEXT:    [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
5292; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
5293; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
5294; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
5295; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
5296; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[_MSLD]], i32 0
5297; CHECK-NEXT:    [[A1V0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A1_VAL]], i32 0
5298; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
5299; CHECK-NEXT:    [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
5300; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
5301; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
5302; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
5303; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
5304; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
5305; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
5306; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
5307; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
5308; CHECK:       12:
5309; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5310; CHECK-NEXT:    unreachable
5311; CHECK:       13:
5312; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
5313; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5314; CHECK-NEXT:    ret <2 x double> [[RES]]
5315;
5316  %a1.val = load double, ptr %a1
5317  %a1v0 = insertelement <2 x double> %extra_param, double %a1.val, i32 0
5318  %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
5319  %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
5320  ret <2 x double> %res
5321}
5322
5323define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) #0 {
5324; CHECK-LABEL: @test_x86_avx512_cvtsi2ss32(
5325; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5326; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5327; CHECK-NEXT:    call void @llvm.donothing()
5328; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
5329; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
5330; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
5331; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5332; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
5333; CHECK:       4:
5334; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5335; CHECK-NEXT:    unreachable
5336; CHECK:       5:
5337; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 11)
5338; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
5339; CHECK-NEXT:    ret <4 x float> [[RES]]
5340;
5341  %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1]
5342  ret <4 x float> %res
5343}
5344declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
5345
5346define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) #0 {
5347; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss(
5348; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5349; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5350; CHECK-NEXT:    call void @llvm.donothing()
5351; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
5352; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
5353; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
5354; CHECK:       4:
5355; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5356; CHECK-NEXT:    unreachable
5357; CHECK:       5:
5358; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 9)
5359; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
5360; CHECK-NEXT:    ret <4 x float> [[RES]]
5361;
5362  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
5363  ret <4 x float> %res
5364}
5365
5366define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
5367; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss_mem(
5368; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5369; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5370; CHECK-NEXT:    call void @llvm.donothing()
5371; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5372; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5373; CHECK:       3:
5374; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5375; CHECK-NEXT:    unreachable
5376; CHECK:       4:
5377; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
5378; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
5379; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
5380; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5381; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
5382; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
5383; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
5384; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
5385; CHECK:       9:
5386; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5387; CHECK-NEXT:    unreachable
5388; CHECK:       10:
5389; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 9)
5390; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5391; CHECK-NEXT:    ret <4 x float> [[RES]]
5392;
5393  %b = load i32, ptr %ptr
5394  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
5395  ret <4 x float> %res
5396}
5397
5398define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) #0 {
5399; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss(
5400; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5401; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5402; CHECK-NEXT:    call void @llvm.donothing()
5403; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
5404; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
5405; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
5406; CHECK:       4:
5407; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5408; CHECK-NEXT:    unreachable
5409; CHECK:       5:
5410; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 4)
5411; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
5412; CHECK-NEXT:    ret <4 x float> [[RES]]
5413;
5414  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
5415  ret <4 x float> %res
5416}
5417
5418define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
5419; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss_mem(
5420; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
5421; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
5422; CHECK-NEXT:    call void @llvm.donothing()
5423; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5424; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5425; CHECK:       3:
5426; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5427; CHECK-NEXT:    unreachable
5428; CHECK:       4:
5429; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
5430; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
5431; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
5432; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5433; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
5434; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
5435; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
5436; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
5437; CHECK:       9:
5438; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5439; CHECK-NEXT:    unreachable
5440; CHECK:       10:
5441; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 4)
5442; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5443; CHECK-NEXT:    ret <4 x float> [[RES]]
5444;
5445  %b = load i32, ptr %ptr
5446  %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
5447  ret <4 x float> %res
5448}
5449declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
5450
5451declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>)
5452
5453define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p) #0 {
5454; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512(
5455; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5456; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5457; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5458; CHECK-NEXT:    call void @llvm.donothing()
5459; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5460; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
5461; CHECK:       4:
5462; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5463; CHECK-NEXT:    unreachable
5464; CHECK:       5:
5465; CHECK-NEXT:    [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
5466; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[X2P]] to i64
5467; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
5468; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
5469; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
5470; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
5471; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
5472; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
5473; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5474; CHECK-NEXT:    ret <16 x i32> [[TMP9]]
5475;
5476  %x2 = load <16 x i32>, ptr %x2p
5477  %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
5478  ret <16 x i32> %1
5479}
5480
5481define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
5482; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512(
5483; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5484; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5485; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5486; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
5487; CHECK-NEXT:    call void @llvm.donothing()
5488; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5489; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
5490; CHECK:       5:
5491; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5492; CHECK-NEXT:    unreachable
5493; CHECK:       6:
5494; CHECK-NEXT:    [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
5495; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
5496; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
5497; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
5498; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
5499; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
5500; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
5501; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
5502; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
5503; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5504; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]]
5505; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X1]]
5506; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
5507; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
5508; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
5509; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X1]]
5510; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5511; CHECK-NEXT:    ret <16 x i32> [[TMP17]]
5512;
5513  %x2 = load <16 x i32>, ptr %x2p
5514  %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
5515  %2 = bitcast i16 %x3 to <16 x i1>
5516  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
5517  ret <16 x i32> %3
5518}
5519
5520declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>)
5521
5522define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 {
5523; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512(
5524; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5525; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5526; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5527; CHECK-NEXT:    call void @llvm.donothing()
5528; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
5529; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
5530; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
5531; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
5532; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5533; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
5534; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
5535; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5536; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
5537; CHECK:       7:
5538; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5539; CHECK-NEXT:    unreachable
5540; CHECK:       8:
5541; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
5542; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5543; CHECK-NEXT:    ret <8 x double> [[TMP9]]
5544;
5545  %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
5546  ret <8 x double> %1
5547}
5548
5549define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
5550; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512(
5551; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5552; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5553; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5554; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5555; CHECK-NEXT:    call void @llvm.donothing()
5556; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
5557; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5558; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
5559; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5560; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5561; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
5562; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
5563; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5564; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5565; CHECK:       8:
5566; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5567; CHECK-NEXT:    unreachable
5568; CHECK:       9:
5569; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
5570; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
5571; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
5572; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
5573; CHECK-NEXT:    [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]]
5574; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64>
5575; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64>
5576; CHECK-NEXT:    [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]]
5577; CHECK-NEXT:    [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer
5578; CHECK-NEXT:    [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]]
5579; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]]
5580; CHECK-NEXT:    [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]]
5581; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5582; CHECK-NEXT:    ret <8 x double> [[TMP20]]
5583;
5584  %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
5585  %2 = bitcast <8 x i64> %x1 to <8 x double>
5586  %3 = bitcast i8 %x3 to <8 x i1>
5587  %4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2
5588  ret <8 x double> %4
5589}
5590
5591declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>)
5592
5593define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 {
5594; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512(
5595; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5596; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5597; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5598; CHECK-NEXT:    call void @llvm.donothing()
5599; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
5600; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
5601; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
5602; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
5603; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5604; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
5605; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
5606; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5607; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
5608; CHECK:       7:
5609; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5610; CHECK-NEXT:    unreachable
5611; CHECK:       8:
5612; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
5613; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
5614; CHECK-NEXT:    ret <16 x float> [[TMP9]]
5615;
5616  %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
5617  ret <16 x float> %1
5618}
5619
5620define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
5621; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512(
5622; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5623; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5624; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5625; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5626; CHECK-NEXT:    call void @llvm.donothing()
5627; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
5628; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5629; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
5630; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5631; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5632; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
5633; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
5634; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5635; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5636; CHECK:       8:
5637; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5638; CHECK-NEXT:    unreachable
5639; CHECK:       9:
5640; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
5641; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
5642; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
5643; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5644; CHECK-NEXT:    [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]]
5645; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
5646; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32>
5647; CHECK-NEXT:    [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]]
5648; CHECK-NEXT:    [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer
5649; CHECK-NEXT:    [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]]
5650; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]]
5651; CHECK-NEXT:    [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]]
5652; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5653; CHECK-NEXT:    ret <16 x float> [[TMP20]]
5654;
5655  %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
5656  %2 = bitcast <16 x i32> %x1 to <16 x float>
5657  %3 = bitcast i16 %x3 to <16 x i1>
5658  %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2
5659  ret <16 x float> %4
5660}
5661
5662declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>)
5663
5664define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
5665; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512(
5666; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5667; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5668; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5669; CHECK-NEXT:    call void @llvm.donothing()
5670; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
5671; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
5672; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
5673; CHECK-NEXT:    store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5674; CHECK-NEXT:    ret <8 x i64> [[TMP4]]
5675;
5676  %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
5677  ret <8 x i64> %1
5678}
5679
5680define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
5681; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512(
5682; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5683; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5684; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5685; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5686; CHECK-NEXT:    call void @llvm.donothing()
5687; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
5688; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
5689; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
5690; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
5691; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
5692; CHECK-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]]
5693; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1]]
5694; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
5695; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP2]]
5696; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
5697; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]]
5698; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5699; CHECK-NEXT:    ret <8 x i64> [[TMP12]]
5700;
5701  %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
5702  %2 = bitcast i8 %x3 to <8 x i1>
5703  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
5704  ret <8 x i64> %3
5705}
5706
5707define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
5708; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512(
5709; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5710; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5711; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5712; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
5713; CHECK-NEXT:    call void @llvm.donothing()
5714; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5715; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
5716; CHECK:       5:
5717; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5718; CHECK-NEXT:    unreachable
5719; CHECK:       6:
5720; CHECK-NEXT:    [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
5721; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
5722; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
5723; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
5724; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
5725; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
5726; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
5727; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]])
5728; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
5729; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5730; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
5731; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
5732; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
5733; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
5734; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
5735; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
5736; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5737; CHECK-NEXT:    ret <16 x i32> [[TMP17]]
5738;
5739  %x2 = load <16 x i32>, ptr %x2p
5740  %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
5741  %2 = bitcast i16 %x3 to <16 x i1>
5742  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
5743  ret <16 x i32> %3
5744}
5745
5746define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, ptr %x2ptr, i8 %x3, <8 x double> %extra_param, <8 x double> %extra_param2) #0 {
5747; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512(
5748; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5749; CHECK-NEXT:    [[TMP5:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
5750; CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 208) to ptr), align 8
5751; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5752; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5753; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
5754; CHECK-NEXT:    call void @llvm.donothing()
5755; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5756; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
5757; CHECK:       7:
5758; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5759; CHECK-NEXT:    unreachable
5760; CHECK:       8:
5761; CHECK-NEXT:    [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8
5762; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64
5763; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
5764; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
5765; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
5766; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <8 x i64> [[TMP5]], i64 [[_MSLD]], i32 0
5767; CHECK-NEXT:    [[X2INS:%.*]] = insertelement <8 x double> [[EXTRA_PARAM:%.*]], double [[X2S]], i32 0
5768; CHECK-NEXT:    [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> [[TMP6]], <8 x i32> zeroinitializer
5769; CHECK-NEXT:    [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> [[EXTRA_PARAM2:%.*]], <8 x i32> zeroinitializer
5770; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
5771; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
5772; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
5773; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
5774; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
5775; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512
5776; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0
5777; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
5778; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]]
5779; CHECK:       15:
5780; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5781; CHECK-NEXT:    unreachable
5782; CHECK:       16:
5783; CHECK-NEXT:    [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]])
5784; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
5785; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
5786; CHECK-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
5787; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64>
5788; CHECK-NEXT:    [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer
5789; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer
5790; CHECK-NEXT:    [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer
5791; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]]
5792; CHECK-NEXT:    [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer
5793; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5794; CHECK-NEXT:    ret <8 x double> [[TMP23]]
5795;
5796  %x2s = load double, ptr %x2ptr
5797  %x2ins = insertelement <8 x double> %extra_param, double %x2s, i32 0
5798  %x2 = shufflevector <8 x double> %x2ins, <8 x double> %extra_param2, <8 x i32> zeroinitializer
5799  %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2)
5800  %2 = bitcast i8 %x3 to <8 x i1>
5801  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
5802  ret <8 x double> %3
5803}
5804
5805define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
5806; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512(
5807; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5808; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5809; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5810; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5811; CHECK-NEXT:    call void @llvm.donothing()
5812; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
5813; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5814; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
5815; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5816; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5817; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
5818; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
5819; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5820; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5821; CHECK:       8:
5822; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5823; CHECK-NEXT:    unreachable
5824; CHECK:       9:
5825; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]])
5826; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
5827; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5828; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
5829; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
5830; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer
5831; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
5832; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer
5833; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]]
5834; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer
5835; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5836; CHECK-NEXT:    ret <16 x float> [[TMP18]]
5837;
5838  %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
5839  %2 = bitcast i16 %x3 to <16 x i1>
5840  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
5841  ret <16 x float> %3
5842}
5843
5844define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
5845; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512(
5846; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5847; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5848; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5849; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5850; CHECK-NEXT:    call void @llvm.donothing()
5851; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
5852; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
5853; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]])
5854; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
5855; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
5856; CHECK-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer
5857; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer
5858; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
5859; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer
5860; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
5861; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer
5862; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5863; CHECK-NEXT:    ret <8 x i64> [[TMP12]]
5864;
5865  %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
5866  %2 = bitcast i8 %x3 to <8 x i1>
5867  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
5868  ret <8 x i64> %3
5869}
5870
5871define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
5872; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512(
5873; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5874; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5875; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5876; CHECK-NEXT:    call void @llvm.donothing()
5877; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
5878; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
5879; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
5880; CHECK-NEXT:    store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5881; CHECK-NEXT:    ret <16 x i32> [[TMP4]]
5882;
5883  %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
5884  ret <16 x i32> %1
5885}
5886
5887define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
5888; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512(
5889; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5890; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5891; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5892; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5893; CHECK-NEXT:    call void @llvm.donothing()
5894; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
5895; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
5896; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
5897; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
5898; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
5899; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]]
5900; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1]]
5901; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]]
5902; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
5903; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
5904; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]]
5905; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
5906; CHECK-NEXT:    ret <16 x i32> [[TMP12]]
5907;
5908  %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
5909  %2 = bitcast i16 %x3 to <16 x i1>
5910  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
5911  ret <16 x i32> %3
5912}
5913
5914declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
5915define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 {
5916; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512(
5917; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
5918; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5919; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5920; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5921; CHECK-NEXT:    call void @llvm.donothing()
5922; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
5923; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5924; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
5925; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5926; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5927; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
5928; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
5929; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5930; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
5931; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
5932; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5933; CHECK:       8:
5934; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5935; CHECK-NEXT:    unreachable
5936; CHECK:       9:
5937; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
5938; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
5939; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
5940; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
5941; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
5942; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
5943; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
5944; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
5945; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
5946; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
5947; CHECK:       13:
5948; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5949; CHECK-NEXT:    unreachable
5950; CHECK:       14:
5951; CHECK-NEXT:    [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x double> [[X2]], i8 -1, i32 8)
5952; CHECK-NEXT:    [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
5953; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
5954; CHECK-NEXT:    ret <8 x double> [[RES2]]
5955;
5956  %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11)
5957  %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8)
5958  %res2 = fadd <8 x double> %res, %res1
5959  ret <8 x double> %res2
5960}
5961
5962declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
5963define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
5964; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512(
5965; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
5966; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
5967; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
5968; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
5969; CHECK-NEXT:    call void @llvm.donothing()
5970; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
5971; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
5972; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
5973; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
5974; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
5975; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
5976; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
5977; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
5978; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
5979; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
5980; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5981; CHECK:       8:
5982; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5983; CHECK-NEXT:    unreachable
5984; CHECK:       9:
5985; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 10)
5986; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
5987; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
5988; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
5989; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
5990; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
5991; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
5992; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
5993; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
5994; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
5995; CHECK:       13:
5996; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
5997; CHECK-NEXT:    unreachable
5998; CHECK:       14:
5999; CHECK-NEXT:    [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x float> [[X2]], i16 -1, i32 8)
6000; CHECK-NEXT:    [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
6001; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6002; CHECK-NEXT:    ret <16 x float> [[RES2]]
6003;
6004  %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10)
6005  %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8)
6006  %res2 = fadd <16 x float> %res, %res1
6007  ret <16 x float> %res2
6008}
6009
6010declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
6011
6012define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
6013; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_512(
6014; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6015; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6016; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6017; CHECK-NEXT:    call void @llvm.donothing()
6018; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6019; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6020; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6021; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6022; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6023; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6024; CHECK:       6:
6025; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6026; CHECK-NEXT:    unreachable
6027; CHECK:       7:
6028; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
6029; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6030; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6031; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6032; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6033; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6034; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6035; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6036; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6037; CHECK:       10:
6038; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6039; CHECK-NEXT:    unreachable
6040; CHECK:       11:
6041; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
6042; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6043; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6044; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6045; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6046; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6047; CHECK:       13:
6048; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6049; CHECK-NEXT:    unreachable
6050; CHECK:       14:
6051; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
6052; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
6053; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
6054; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
6055; CHECK-NEXT:    ret <16 x i8> [[RES4]]
6056;
6057  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
6058  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
6059  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
6060  %res3 = add <16 x i8> %res0, %res1
6061  %res4 = add <16 x i8> %res3, %res2
6062  ret <16 x i8> %res4
6063}
6064
6065declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64>, i8)
6066
6067define void @test_int_x86_avx512_mask_pmov_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6068; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_mem_512(
6069; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6070; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6071; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6072; CHECK-NEXT:    call void @llvm.donothing()
6073; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6074; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6075; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6076; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6077; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6078; CHECK:       5:
6079; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6080; CHECK-NEXT:    unreachable
6081; CHECK:       6:
6082; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6083; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6084; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6085; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6086; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6087; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6088; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6089; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6090; CHECK:       8:
6091; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6092; CHECK-NEXT:    unreachable
6093; CHECK:       9:
6094; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6095; CHECK-NEXT:    ret void
6096;
6097  call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6098  call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6099  ret void
6100}
6101
6102declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
6103
6104define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
6105; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_512(
6106; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6107; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6108; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6109; CHECK-NEXT:    call void @llvm.donothing()
6110; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6111; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6112; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6113; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6114; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6115; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6116; CHECK:       6:
6117; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6118; CHECK-NEXT:    unreachable
6119; CHECK:       7:
6120; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
6121; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6122; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6123; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6124; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6125; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6126; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6127; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6128; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6129; CHECK:       10:
6130; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6131; CHECK-NEXT:    unreachable
6132; CHECK:       11:
6133; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
6134; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6135; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6136; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6137; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6138; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6139; CHECK:       13:
6140; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6141; CHECK-NEXT:    unreachable
6142; CHECK:       14:
6143; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
6144; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
6145; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
6146; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
6147; CHECK-NEXT:    ret <16 x i8> [[RES4]]
6148;
6149  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
6150  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
6151  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
6152  %res3 = add <16 x i8> %res0, %res1
6153  %res4 = add <16 x i8> %res3, %res2
6154  ret <16 x i8> %res4
6155}
6156
6157declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64>, i8)
6158
6159define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6160; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_mem_512(
6161; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6162; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6163; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6164; CHECK-NEXT:    call void @llvm.donothing()
6165; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6166; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6167; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6168; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6169; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6170; CHECK:       5:
6171; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6172; CHECK-NEXT:    unreachable
6173; CHECK:       6:
6174; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6175; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6176; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6177; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6178; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6179; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6180; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6181; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6182; CHECK:       8:
6183; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6184; CHECK-NEXT:    unreachable
6185; CHECK:       9:
6186; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6187; CHECK-NEXT:    ret void
6188;
6189  call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6190  call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6191  ret void
6192}
6193
6194declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
6195
6196define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
6197; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_512(
6198; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6199; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6200; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6201; CHECK-NEXT:    call void @llvm.donothing()
6202; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6203; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6204; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6205; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6206; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6207; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6208; CHECK:       6:
6209; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6210; CHECK-NEXT:    unreachable
6211; CHECK:       7:
6212; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
6213; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6214; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6215; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6216; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6217; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6218; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6219; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6220; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6221; CHECK:       10:
6222; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6223; CHECK-NEXT:    unreachable
6224; CHECK:       11:
6225; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
6226; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6227; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6228; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6229; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6230; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6231; CHECK:       13:
6232; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6233; CHECK-NEXT:    unreachable
6234; CHECK:       14:
6235; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
6236; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
6237; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
6238; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
6239; CHECK-NEXT:    ret <16 x i8> [[RES4]]
6240;
6241  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
6242  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
6243  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
6244  %res3 = add <16 x i8> %res0, %res1
6245  %res4 = add <16 x i8> %res3, %res2
6246  ret <16 x i8> %res4
6247}
6248
6249declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64>, i8)
6250
6251define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6252; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_mem_512(
6253; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6254; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6255; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6256; CHECK-NEXT:    call void @llvm.donothing()
6257; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6258; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6259; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6260; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6261; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6262; CHECK:       5:
6263; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6264; CHECK-NEXT:    unreachable
6265; CHECK:       6:
6266; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6267; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6268; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6269; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6270; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6271; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6272; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6273; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6274; CHECK:       8:
6275; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6276; CHECK-NEXT:    unreachable
6277; CHECK:       9:
6278; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6279; CHECK-NEXT:    ret void
6280;
6281  call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6282  call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6283  ret void
6284}
6285
6286declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
6287
6288define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
6289; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_512(
6290; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6291; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6292; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6293; CHECK-NEXT:    call void @llvm.donothing()
6294; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6295; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6296; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6297; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6298; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6299; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6300; CHECK:       6:
6301; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6302; CHECK-NEXT:    unreachable
6303; CHECK:       7:
6304; CHECK-NEXT:    [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
6305; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6306; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6307; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6308; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6309; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6310; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6311; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6312; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6313; CHECK:       10:
6314; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6315; CHECK-NEXT:    unreachable
6316; CHECK:       11:
6317; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
6318; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6319; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6320; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6321; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6322; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6323; CHECK:       13:
6324; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6325; CHECK-NEXT:    unreachable
6326; CHECK:       14:
6327; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
6328; CHECK-NEXT:    [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
6329; CHECK-NEXT:    [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
6330; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
6331; CHECK-NEXT:    ret <8 x i16> [[RES4]]
6332;
6333  %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
6334  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
6335  %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
6336  %res3 = add <8 x i16> %res0, %res1
6337  %res4 = add <8 x i16> %res3, %res2
6338  ret <8 x i16> %res4
6339}
6340
6341declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64>, i8)
6342
6343define void @test_int_x86_avx512_mask_pmov_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6344; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_mem_512(
6345; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6346; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6347; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6348; CHECK-NEXT:    call void @llvm.donothing()
6349; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6350; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6351; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6352; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6353; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6354; CHECK:       5:
6355; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6356; CHECK-NEXT:    unreachable
6357; CHECK:       6:
6358; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6359; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6360; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6361; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6362; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6363; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6364; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6365; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6366; CHECK:       8:
6367; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6368; CHECK-NEXT:    unreachable
6369; CHECK:       9:
6370; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6371; CHECK-NEXT:    ret void
6372;
6373  call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6374  call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6375  ret void
6376}
6377
6378declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
6379
6380define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
6381; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_512(
6382; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6383; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6384; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6385; CHECK-NEXT:    call void @llvm.donothing()
6386; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6387; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6388; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6389; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6390; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6391; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6392; CHECK:       6:
6393; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6394; CHECK-NEXT:    unreachable
6395; CHECK:       7:
6396; CHECK-NEXT:    [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
6397; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6398; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6399; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6400; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6401; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6402; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6403; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6404; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6405; CHECK:       10:
6406; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6407; CHECK-NEXT:    unreachable
6408; CHECK:       11:
6409; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
6410; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6411; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6412; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6413; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6414; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6415; CHECK:       13:
6416; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6417; CHECK-NEXT:    unreachable
6418; CHECK:       14:
6419; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
6420; CHECK-NEXT:    [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
6421; CHECK-NEXT:    [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
6422; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
6423; CHECK-NEXT:    ret <8 x i16> [[RES4]]
6424;
6425  %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
6426  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
6427  %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
6428  %res3 = add <8 x i16> %res0, %res1
6429  %res4 = add <8 x i16> %res3, %res2
6430  ret <8 x i16> %res4
6431}
6432
6433declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64>, i8)
6434
6435define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6436; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_mem_512(
6437; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6438; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6439; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6440; CHECK-NEXT:    call void @llvm.donothing()
6441; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6442; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6443; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6444; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6445; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6446; CHECK:       5:
6447; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6448; CHECK-NEXT:    unreachable
6449; CHECK:       6:
6450; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6451; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6452; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6453; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6454; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6455; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6456; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6457; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6458; CHECK:       8:
6459; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6460; CHECK-NEXT:    unreachable
6461; CHECK:       9:
6462; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6463; CHECK-NEXT:    ret void
6464;
6465  call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6466  call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6467  ret void
6468}
6469
6470declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
6471
6472define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
6473; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_512(
6474; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6475; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6476; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6477; CHECK-NEXT:    call void @llvm.donothing()
6478; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6479; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6480; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6481; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6482; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6483; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6484; CHECK:       6:
6485; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6486; CHECK-NEXT:    unreachable
6487; CHECK:       7:
6488; CHECK-NEXT:    [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
6489; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6490; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6491; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
6492; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6493; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6494; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6495; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6496; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6497; CHECK:       10:
6498; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6499; CHECK-NEXT:    unreachable
6500; CHECK:       11:
6501; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
6502; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6503; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6504; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
6505; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6506; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6507; CHECK:       13:
6508; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6509; CHECK-NEXT:    unreachable
6510; CHECK:       14:
6511; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
6512; CHECK-NEXT:    [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
6513; CHECK-NEXT:    [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
6514; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
6515; CHECK-NEXT:    ret <8 x i16> [[RES4]]
6516;
6517  %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
6518  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
6519  %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
6520  %res3 = add <8 x i16> %res0, %res1
6521  %res4 = add <8 x i16> %res3, %res2
6522  ret <8 x i16> %res4
6523}
6524
6525declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64>, i8)
6526
6527define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6528; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_mem_512(
6529; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6530; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6531; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6532; CHECK-NEXT:    call void @llvm.donothing()
6533; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6534; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6535; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6536; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6537; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6538; CHECK:       5:
6539; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6540; CHECK-NEXT:    unreachable
6541; CHECK:       6:
6542; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6543; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6544; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6545; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6546; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6547; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6548; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6549; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6550; CHECK:       8:
6551; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6552; CHECK-NEXT:    unreachable
6553; CHECK:       9:
6554; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6555; CHECK-NEXT:    ret void
6556;
6557  call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6558  call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6559  ret void
6560}
6561
6562define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
6563; CHECK-LABEL: @test_int_x86_avx512_pmov_qd_512(
6564; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6565; CHECK-NEXT:    call void @llvm.donothing()
6566; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
6567; CHECK-NEXT:    [[TMP2:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
6568; CHECK-NEXT:    store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6569; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
6570;
6571  %1 = trunc <8 x i64> %x0 to <8 x i32>
6572  ret <8 x i32> %1
6573}
6574
6575define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
6576; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_512(
6577; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6578; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
6579; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6580; CHECK-NEXT:    call void @llvm.donothing()
6581; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
6582; CHECK-NEXT:    [[TMP4:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
6583; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
6584; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
6585; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[_MSPROP]], <8 x i32> [[TMP3]]
6586; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i32> [[TMP4]], [[X1:%.*]]
6587; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[_MSPROP]]
6588; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i32> [[TMP9]], [[TMP3]]
6589; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP10]], <8 x i32> [[TMP7]]
6590; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP4]], <8 x i32> [[X1]]
6591; CHECK-NEXT:    store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
6592; CHECK-NEXT:    ret <8 x i32> [[TMP11]]
6593;
6594  %1 = trunc <8 x i64> %x0 to <8 x i32>
6595  %2 = bitcast i8 %x2 to <8 x i1>
6596  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
6597  ret <8 x i32> %3
6598}
6599
6600define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0,  i8 %x2) #0 {
6601; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_qd_512(
6602; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6603; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6604; CHECK-NEXT:    call void @llvm.donothing()
6605; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
6606; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
6607; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
6608; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
6609; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[_MSPROP]], <8 x i32> zeroinitializer
6610; CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i32> [[TMP3]], zeroinitializer
6611; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i32> [[TMP7]], [[_MSPROP]]
6612; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i32> [[TMP8]], zeroinitializer
6613; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP9]], <8 x i32> [[TMP6]]
6614; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> zeroinitializer
6615; CHECK-NEXT:    store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
6616; CHECK-NEXT:    ret <8 x i32> [[TMP10]]
6617;
6618  %1 = trunc <8 x i64> %x0 to <8 x i32>
6619  %2 = bitcast i8 %x2 to <8 x i1>
6620  %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
6621  ret <8 x i32> %3
6622}
6623
6624declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64>, i8)
6625
6626define void @test_int_x86_avx512_mask_pmov_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6627; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_mem_512(
6628; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6629; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6630; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6631; CHECK-NEXT:    call void @llvm.donothing()
6632; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6633; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6634; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6635; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6636; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6637; CHECK:       5:
6638; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6639; CHECK-NEXT:    unreachable
6640; CHECK:       6:
6641; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6642; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6643; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6644; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6645; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6646; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6647; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6648; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6649; CHECK:       8:
6650; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6651; CHECK-NEXT:    unreachable
6652; CHECK:       9:
6653; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6654; CHECK-NEXT:    ret void
6655;
6656  call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6657  call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6658  ret void
6659}
6660
6661declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
6662
6663define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
6664; CHECK-LABEL: @test_int_x86_avx512_pmovs_qd_512(
6665; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6666; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6667; CHECK-NEXT:    call void @llvm.donothing()
6668; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6669; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
6670; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
6671; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
6672; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6673; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6674; CHECK:       5:
6675; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6676; CHECK-NEXT:    unreachable
6677; CHECK:       6:
6678; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
6679; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6680; CHECK-NEXT:    ret <8 x i32> [[RES]]
6681;
6682  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
6683  ret <8 x i32> %res
6684}
6685
6686define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
6687; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_512(
6688; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6689; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6690; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
6691; CHECK-NEXT:    call void @llvm.donothing()
6692; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6693; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6694; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
6695; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
6696; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6697; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
6698; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
6699; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6700; CHECK:       6:
6701; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6702; CHECK-NEXT:    unreachable
6703; CHECK:       7:
6704; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
6705; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6706; CHECK-NEXT:    ret <8 x i32> [[RES]]
6707;
6708  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
6709  ret <8 x i32> %res
6710}
6711
6712define <8 x i32>@test_int_x86_avx512_maskz_pmovs_qd_512(<8 x i64> %x0, i8 %x2) #0 {
6713; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_qd_512(
6714; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6715; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6716; CHECK-NEXT:    call void @llvm.donothing()
6717; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6718; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
6719; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
6720; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6721; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
6722; CHECK:       4:
6723; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6724; CHECK-NEXT:    unreachable
6725; CHECK:       5:
6726; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
6727; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6728; CHECK-NEXT:    ret <8 x i32> [[RES]]
6729;
6730  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
6731  ret <8 x i32> %res
6732}
6733
6734declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64>, i8)
6735
6736define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6737; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_mem_512(
6738; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6739; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6740; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6741; CHECK-NEXT:    call void @llvm.donothing()
6742; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6743; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6744; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6745; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6746; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6747; CHECK:       5:
6748; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6749; CHECK-NEXT:    unreachable
6750; CHECK:       6:
6751; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6752; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6753; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6754; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6755; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6756; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6757; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6758; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6759; CHECK:       8:
6760; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6761; CHECK-NEXT:    unreachable
6762; CHECK:       9:
6763; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6764; CHECK-NEXT:    ret void
6765;
6766  call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6767  call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6768  ret void
6769}
6770
6771declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
6772
6773define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
6774; CHECK-LABEL: @test_int_x86_avx512_pmovus_qd_512(
6775; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6776; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6777; CHECK-NEXT:    call void @llvm.donothing()
6778; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6779; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
6780; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
6781; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
6782; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6783; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6784; CHECK:       5:
6785; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6786; CHECK-NEXT:    unreachable
6787; CHECK:       6:
6788; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
6789; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6790; CHECK-NEXT:    ret <8 x i32> [[RES]]
6791;
6792  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
6793  ret <8 x i32> %res
6794}
6795
6796define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
6797; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_512(
6798; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6799; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6800; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
6801; CHECK-NEXT:    call void @llvm.donothing()
6802; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6803; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6804; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
6805; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
6806; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6807; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
6808; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
6809; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6810; CHECK:       6:
6811; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6812; CHECK-NEXT:    unreachable
6813; CHECK:       7:
6814; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
6815; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6816; CHECK-NEXT:    ret <8 x i32> [[RES]]
6817;
6818  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
6819  ret <8 x i32> %res
6820}
6821
6822define <8 x i32>@test_int_x86_avx512_maskz_pmovus_qd_512(<8 x i64> %x0, i8 %x2) #0 {
6823; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_qd_512(
6824; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
6825; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6826; CHECK-NEXT:    call void @llvm.donothing()
6827; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
6828; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
6829; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
6830; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6831; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
6832; CHECK:       4:
6833; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6834; CHECK-NEXT:    unreachable
6835; CHECK:       5:
6836; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
6837; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
6838; CHECK-NEXT:    ret <8 x i32> [[RES]]
6839;
6840  %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
6841  ret <8 x i32> %res
6842}
6843
6844declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64>, i8)
6845
6846define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
6847; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_mem_512(
6848; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6849; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6850; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6851; CHECK-NEXT:    call void @llvm.donothing()
6852; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6853; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6854; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6855; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6856; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6857; CHECK:       5:
6858; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6859; CHECK-NEXT:    unreachable
6860; CHECK:       6:
6861; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
6862; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6863; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
6864; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6865; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6866; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
6867; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6868; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6869; CHECK:       8:
6870; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6871; CHECK-NEXT:    unreachable
6872; CHECK:       9:
6873; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
6874; CHECK-NEXT:    ret void
6875;
6876  call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
6877  call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
6878  ret void
6879}
6880
6881declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
6882
6883define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
6884; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_512(
6885; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
6886; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6887; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6888; CHECK-NEXT:    call void @llvm.donothing()
6889; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
6890; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6891; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6892; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6893; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6894; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6895; CHECK:       6:
6896; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6897; CHECK-NEXT:    unreachable
6898; CHECK:       7:
6899; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
6900; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
6901; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6902; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6903; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6904; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6905; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
6906; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6907; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
6908; CHECK:       10:
6909; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6910; CHECK-NEXT:    unreachable
6911; CHECK:       11:
6912; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
6913; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
6914; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
6915; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
6916; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
6917; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6918; CHECK:       13:
6919; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6920; CHECK-NEXT:    unreachable
6921; CHECK:       14:
6922; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
6923; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
6924; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
6925; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
6926; CHECK-NEXT:    ret <16 x i8> [[RES4]]
6927;
6928  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
6929  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
6930  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
6931  %res3 = add <16 x i8> %res0, %res1
6932  %res4 = add <16 x i8> %res3, %res2
6933  ret <16 x i8> %res4
6934}
6935
6936declare void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32>, i16)
6937
6938define void @test_int_x86_avx512_mask_pmov_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
6939; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_mem_512(
6940; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6941; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6942; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
6943; CHECK-NEXT:    call void @llvm.donothing()
6944; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6945; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
6946; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
6947; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6948; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
6949; CHECK:       5:
6950; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6951; CHECK-NEXT:    unreachable
6952; CHECK:       6:
6953; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
6954; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
6955; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
6956; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
6957; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6958; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
6959; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6960; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
6961; CHECK:       8:
6962; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6963; CHECK-NEXT:    unreachable
6964; CHECK:       9:
6965; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
6966; CHECK-NEXT:    ret void
6967;
6968  call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
6969  call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
6970  ret void
6971}
6972
6973declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
6974
6975define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
6976; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_512(
6977; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
6978; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
6979; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
6980; CHECK-NEXT:    call void @llvm.donothing()
6981; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
6982; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
6983; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6984; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
6985; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
6986; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
6987; CHECK:       6:
6988; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
6989; CHECK-NEXT:    unreachable
6990; CHECK:       7:
6991; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
6992; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
6993; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
6994; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
6995; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
6996; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
6997; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
6998; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
6999; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7000; CHECK:       10:
7001; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7002; CHECK-NEXT:    unreachable
7003; CHECK:       11:
7004; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
7005; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7006; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
7007; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
7008; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
7009; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7010; CHECK:       13:
7011; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7012; CHECK-NEXT:    unreachable
7013; CHECK:       14:
7014; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
7015; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
7016; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
7017; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
7018; CHECK-NEXT:    ret <16 x i8> [[RES4]]
7019;
7020  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
7021  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
7022  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
7023  %res3 = add <16 x i8> %res0, %res1
7024  %res4 = add <16 x i8> %res3, %res2
7025  ret <16 x i8> %res4
7026}
7027
7028declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32>, i16)
7029
7030define void @test_int_x86_avx512_mask_pmovs_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
7031; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_mem_512(
7032; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7033; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7034; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
7035; CHECK-NEXT:    call void @llvm.donothing()
7036; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7037; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7038; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
7039; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7040; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7041; CHECK:       5:
7042; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7043; CHECK-NEXT:    unreachable
7044; CHECK:       6:
7045; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
7046; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
7047; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7048; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
7049; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7050; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7051; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7052; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7053; CHECK:       8:
7054; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7055; CHECK-NEXT:    unreachable
7056; CHECK:       9:
7057; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
7058; CHECK-NEXT:    ret void
7059;
7060  call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
7061  call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
7062  ret void
7063}
7064
7065declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
7066
7067define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
7068; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_512(
7069; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7070; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7071; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
7072; CHECK-NEXT:    call void @llvm.donothing()
7073; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7074; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7075; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
7076; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
7077; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7078; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7079; CHECK:       6:
7080; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7081; CHECK-NEXT:    unreachable
7082; CHECK:       7:
7083; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
7084; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7085; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
7086; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
7087; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
7088; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7089; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7090; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7091; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7092; CHECK:       10:
7093; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7094; CHECK-NEXT:    unreachable
7095; CHECK:       11:
7096; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
7097; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7098; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
7099; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
7100; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
7101; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7102; CHECK:       13:
7103; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7104; CHECK-NEXT:    unreachable
7105; CHECK:       14:
7106; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
7107; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
7108; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
7109; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
7110; CHECK-NEXT:    ret <16 x i8> [[RES4]]
7111;
7112  %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
7113  %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
7114  %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
7115  %res3 = add <16 x i8> %res0, %res1
7116  %res4 = add <16 x i8> %res3, %res2
7117  ret <16 x i8> %res4
7118}
7119
7120declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32>, i16)
7121
7122define void @test_int_x86_avx512_mask_pmovus_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
7123; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_mem_512(
7124; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7125; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7126; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
7127; CHECK-NEXT:    call void @llvm.donothing()
7128; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7129; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7130; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
7131; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7132; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7133; CHECK:       5:
7134; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7135; CHECK-NEXT:    unreachable
7136; CHECK:       6:
7137; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
7138; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
7139; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7140; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
7141; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7142; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7143; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7144; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7145; CHECK:       8:
7146; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7147; CHECK-NEXT:    unreachable
7148; CHECK:       9:
7149; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
7150; CHECK-NEXT:    ret void
7151;
7152  call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
7153  call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
7154  ret void
7155}
7156
7157declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
7158
7159define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
7160; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_512(
7161; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7162; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7163; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7164; CHECK-NEXT:    call void @llvm.donothing()
7165; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7166; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7167; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7168; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7169; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7170; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7171; CHECK:       6:
7172; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7173; CHECK-NEXT:    unreachable
7174; CHECK:       7:
7175; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
7176; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7177; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
7178; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7179; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
7180; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7181; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7182; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7183; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7184; CHECK:       10:
7185; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7186; CHECK-NEXT:    unreachable
7187; CHECK:       11:
7188; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
7189; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7190; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
7191; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
7192; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
7193; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7194; CHECK:       13:
7195; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7196; CHECK-NEXT:    unreachable
7197; CHECK:       14:
7198; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
7199; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
7200; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
7201; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
7202; CHECK-NEXT:    ret <16 x i16> [[RES4]]
7203;
7204  %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
7205  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
7206  %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
7207  %res3 = add <16 x i16> %res0, %res1
7208  %res4 = add <16 x i16> %res3, %res2
7209  ret <16 x i16> %res4
7210}
7211
7212declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32>, i16)
7213
7214define void @test_int_x86_avx512_mask_pmov_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
7215; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_mem_512(
7216; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7217; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7218; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
7219; CHECK-NEXT:    call void @llvm.donothing()
7220; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7221; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7222; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
7223; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7224; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7225; CHECK:       5:
7226; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7227; CHECK-NEXT:    unreachable
7228; CHECK:       6:
7229; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
7230; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
7231; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7232; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
7233; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7234; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7235; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7236; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7237; CHECK:       8:
7238; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7239; CHECK-NEXT:    unreachable
7240; CHECK:       9:
7241; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
7242; CHECK-NEXT:    ret void
7243;
7244  call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
7245  call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
7246  ret void
7247}
7248
7249declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
7250
7251define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
7252; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_512(
7253; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7254; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7255; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7256; CHECK-NEXT:    call void @llvm.donothing()
7257; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7258; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7259; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7260; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7261; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7262; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7263; CHECK:       6:
7264; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7265; CHECK-NEXT:    unreachable
7266; CHECK:       7:
7267; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
7268; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7269; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
7270; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7271; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
7272; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7273; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7274; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7275; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7276; CHECK:       10:
7277; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7278; CHECK-NEXT:    unreachable
7279; CHECK:       11:
7280; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
7281; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7282; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
7283; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
7284; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
7285; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7286; CHECK:       13:
7287; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7288; CHECK-NEXT:    unreachable
7289; CHECK:       14:
7290; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
7291; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
7292; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
7293; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
7294; CHECK-NEXT:    ret <16 x i16> [[RES4]]
7295;
7296  %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
7297  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
7298  %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
7299  %res3 = add <16 x i16> %res0, %res1
7300  %res4 = add <16 x i16> %res3, %res2
7301  ret <16 x i16> %res4
7302}
7303
7304declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32>, i16)
7305
7306define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
7307; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_mem_512(
7308; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7309; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7310; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
7311; CHECK-NEXT:    call void @llvm.donothing()
7312; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7313; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7314; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
7315; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7316; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7317; CHECK:       5:
7318; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7319; CHECK-NEXT:    unreachable
7320; CHECK:       6:
7321; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
7322; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
7323; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7324; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
7325; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7326; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7327; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7328; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7329; CHECK:       8:
7330; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7331; CHECK-NEXT:    unreachable
7332; CHECK:       9:
7333; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
7334; CHECK-NEXT:    ret void
7335;
7336  call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
7337  call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
7338  ret void
7339}
7340
7341declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
7342
7343define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
7344; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_512(
7345; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7346; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7347; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7348; CHECK-NEXT:    call void @llvm.donothing()
7349; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7350; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7351; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7352; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7353; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7354; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7355; CHECK:       6:
7356; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7357; CHECK-NEXT:    unreachable
7358; CHECK:       7:
7359; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
7360; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7361; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
7362; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
7363; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
7364; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7365; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7366; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7367; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7368; CHECK:       10:
7369; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7370; CHECK-NEXT:    unreachable
7371; CHECK:       11:
7372; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
7373; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7374; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
7375; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
7376; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
7377; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7378; CHECK:       13:
7379; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7380; CHECK-NEXT:    unreachable
7381; CHECK:       14:
7382; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
7383; CHECK-NEXT:    [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
7384; CHECK-NEXT:    [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
7385; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
7386; CHECK-NEXT:    ret <16 x i16> [[RES4]]
7387;
7388  %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
7389  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
7390  %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
7391  %res3 = add <16 x i16> %res0, %res1
7392  %res4 = add <16 x i16> %res3, %res2
7393  ret <16 x i16> %res4
7394}
7395
7396declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32>, i16)
7397
7398define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
7399; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_mem_512(
7400; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7401; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7402; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
7403; CHECK-NEXT:    call void @llvm.donothing()
7404; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7405; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7406; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
7407; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7408; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7409; CHECK:       5:
7410; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7411; CHECK-NEXT:    unreachable
7412; CHECK:       6:
7413; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
7414; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
7415; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7416; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
7417; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
7418; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
7419; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
7420; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7421; CHECK:       8:
7422; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7423; CHECK-NEXT:    unreachable
7424; CHECK:       9:
7425; CHECK-NEXT:    call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
7426; CHECK-NEXT:    ret void
7427;
7428  call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
7429  call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
7430  ret void
7431}
7432
7433declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32)
7434
7435define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
7436; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2ps_512(
7437; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7438; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7439; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7440; CHECK-NEXT:    call void @llvm.donothing()
7441; CHECK-NEXT:    [[CVT:%.*]] = sitofp <16 x i32> [[X0:%.*]] to <16 x float>
7442; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
7443; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
7444; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
7445; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
7446; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
7447; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
7448; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
7449; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
7450; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
7451; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
7452; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7453; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
7454; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
7455; CHECK:       14:
7456; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7457; CHECK-NEXT:    unreachable
7458; CHECK:       15:
7459; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
7460; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
7461; CHECK-NEXT:    [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
7462; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7463; CHECK-NEXT:    ret <16 x float> [[RES2]]
7464;
7465  %cvt = sitofp <16 x i32> %x0 to <16 x float>
7466  %1 = bitcast i16 %x2 to <16 x i1>
7467  %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
7468  %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
7469  %res2 = fadd <16 x float> %2, %3
7470  ret <16 x float> %res2
7471}
7472
7473declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
7474
7475define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
7476; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2dq_512(
7477; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
7478; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7479; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7480; CHECK-NEXT:    call void @llvm.donothing()
7481; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7482; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7483; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7484; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7485; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7486; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7487; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7488; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7489; CHECK:       6:
7490; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7491; CHECK-NEXT:    unreachable
7492; CHECK:       7:
7493; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
7494; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7495; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7496; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7497; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
7498; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7499; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7500; CHECK:       10:
7501; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7502; CHECK-NEXT:    unreachable
7503; CHECK:       11:
7504; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
7505; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
7506; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7507; CHECK-NEXT:    ret <8 x i32> [[RES2]]
7508;
7509  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
7510  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
7511  %res2 = add <8 x i32> %res, %res1
7512  ret <8 x i32> %res2
7513}
7514
7515declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
7516
7517define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) #0 {
7518; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2ps_512(
7519; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
7520; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7521; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7522; CHECK-NEXT:    call void @llvm.donothing()
7523; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7524; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7525; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7526; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7527; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7528; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7529; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7530; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7531; CHECK:       6:
7532; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7533; CHECK-NEXT:    unreachable
7534; CHECK:       7:
7535; CHECK-NEXT:    [[RES:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0:%.*]], <8 x float> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
7536; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7537; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7538; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7539; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
7540; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7541; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7542; CHECK:       10:
7543; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7544; CHECK-NEXT:    unreachable
7545; CHECK:       11:
7546; CHECK-NEXT:    [[RES1:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0]], <8 x float> [[X1]], i8 -1, i32 10)
7547; CHECK-NEXT:    [[RES2:%.*]] = fadd <8 x float> [[RES]], [[RES1]]
7548; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7549; CHECK-NEXT:    ret <8 x float> [[RES2]]
7550;
7551  %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
7552  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10)
7553  %res2 = fadd <8 x float> %res, %res1
7554  ret <8 x float> %res2
7555}
7556
7557declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
7558
7559define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
7560; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2udq_512(
7561; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
7562; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7563; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7564; CHECK-NEXT:    call void @llvm.donothing()
7565; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7566; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7567; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7568; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7569; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7570; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7571; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7572; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7573; CHECK:       6:
7574; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7575; CHECK-NEXT:    unreachable
7576; CHECK:       7:
7577; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 10)
7578; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7579; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7580; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7581; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
7582; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7583; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7584; CHECK:       10:
7585; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7586; CHECK-NEXT:    unreachable
7587; CHECK:       11:
7588; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
7589; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
7590; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7591; CHECK-NEXT:    ret <8 x i32> [[RES2]]
7592;
7593  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10)
7594  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
7595  %res2 = add <8 x i32> %res, %res1
7596  ret <8 x i32> %res2
7597}
7598
7599declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
7600
7601define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
7602; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2dq_512(
7603; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7604; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7605; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7606; CHECK-NEXT:    call void @llvm.donothing()
7607; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7608; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7609; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7610; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
7611; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7612; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
7613; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7614; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7615; CHECK:       6:
7616; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7617; CHECK-NEXT:    unreachable
7618; CHECK:       7:
7619; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
7620; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7621; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7622; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7623; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
7624; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7625; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7626; CHECK:       10:
7627; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7628; CHECK-NEXT:    unreachable
7629; CHECK:       11:
7630; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
7631; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
7632; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7633; CHECK-NEXT:    ret <16 x i32> [[RES2]]
7634;
7635  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
7636  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
7637  %res2 = add <16 x i32> %res, %res1
7638  ret <16 x i32> %res2
7639}
7640
7641declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
7642
7643define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) #0 {
7644; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2pd_512(
7645; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
7646; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
7647; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7648; CHECK-NEXT:    call void @llvm.donothing()
7649; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
7650; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
7651; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
7652; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
7653; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7654; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7655; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7656; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7657; CHECK:       6:
7658; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7659; CHECK-NEXT:    unreachable
7660; CHECK:       7:
7661; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
7662; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
7663; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
7664; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
7665; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
7666; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7667; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7668; CHECK:       10:
7669; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7670; CHECK-NEXT:    unreachable
7671; CHECK:       11:
7672; CHECK-NEXT:    [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0]], <8 x double> [[X1]], i8 -1, i32 8)
7673; CHECK-NEXT:    [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
7674; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
7675; CHECK-NEXT:    ret <8 x double> [[RES2]]
7676;
7677  %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
7678  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
7679  %res2 = fadd <8 x double> %res, %res1
7680  ret <8 x double> %res2
7681}
7682
7683declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
7684
7685define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
7686; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2udq_512(
7687; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7688; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7689; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7690; CHECK-NEXT:    call void @llvm.donothing()
7691; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7692; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7693; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7694; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
7695; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7696; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
7697; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7698; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7699; CHECK:       6:
7700; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7701; CHECK-NEXT:    unreachable
7702; CHECK:       7:
7703; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
7704; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7705; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7706; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7707; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
7708; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7709; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7710; CHECK:       10:
7711; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7712; CHECK-NEXT:    unreachable
7713; CHECK:       11:
7714; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
7715; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
7716; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7717; CHECK-NEXT:    ret <16 x i32> [[RES2]]
7718;
7719  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
7720  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
7721  %res2 = add <16 x i32> %res, %res1
7722  ret <16 x i32> %res2
7723}
7724
7725declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
7726
7727define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
7728; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2dq_512(
7729; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
7730; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7731; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7732; CHECK-NEXT:    call void @llvm.donothing()
7733; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7734; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7735; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7736; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7737; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7738; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7739; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7740; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7741; CHECK:       6:
7742; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7743; CHECK-NEXT:    unreachable
7744; CHECK:       7:
7745; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
7746; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7747; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7748; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7749; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
7750; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7751; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7752; CHECK:       10:
7753; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7754; CHECK-NEXT:    unreachable
7755; CHECK:       11:
7756; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
7757; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
7758; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7759; CHECK-NEXT:    ret <8 x i32> [[RES2]]
7760;
7761  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
7762  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
7763  %res2 = add <8 x i32> %res, %res1
7764  ret <8 x i32> %res2
7765}
7766
7767declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32)
7768
7769define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
7770; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2ps_512(
7771; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7772; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7773; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7774; CHECK-NEXT:    call void @llvm.donothing()
7775; CHECK-NEXT:    [[CVT:%.*]] = uitofp <16 x i32> [[X0:%.*]] to <16 x float>
7776; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
7777; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
7778; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
7779; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
7780; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
7781; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
7782; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
7783; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
7784; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
7785; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
7786; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7787; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
7788; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
7789; CHECK:       14:
7790; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7791; CHECK-NEXT:    unreachable
7792; CHECK:       15:
7793; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
7794; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
7795; CHECK-NEXT:    [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
7796; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7797; CHECK-NEXT:    ret <16 x float> [[RES2]]
7798;
7799  %cvt = uitofp <16 x i32> %x0 to <16 x float>
7800  %1 = bitcast i16 %x2 to <16 x i1>
7801  %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
7802  %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
7803  %res2 = fadd <16 x float> %2, %3
7804  ret <16 x float> %res2
7805}
7806
7807declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
7808
7809define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
7810; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2udq_512(
7811; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
7812; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7813; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
7814; CHECK-NEXT:    call void @llvm.donothing()
7815; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7816; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7817; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7818; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
7819; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7820; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
7821; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7822; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7823; CHECK:       6:
7824; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7825; CHECK-NEXT:    unreachable
7826; CHECK:       7:
7827; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
7828; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
7829; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7830; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
7831; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
7832; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7833; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7834; CHECK:       10:
7835; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7836; CHECK-NEXT:    unreachable
7837; CHECK:       11:
7838; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
7839; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
7840; CHECK-NEXT:    store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7841; CHECK-NEXT:    ret <8 x i32> [[RES2]]
7842;
7843  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
7844  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
7845  %res2 = add <8 x i32> %res, %res1
7846  ret <8 x i32> %res2
7847}
7848
7849declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
7850
7851define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
7852; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2dq_512(
7853; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7854; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7855; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7856; CHECK-NEXT:    call void @llvm.donothing()
7857; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7858; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7859; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7860; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
7861; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7862; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
7863; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7864; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7865; CHECK:       6:
7866; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7867; CHECK-NEXT:    unreachable
7868; CHECK:       7:
7869; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
7870; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7871; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7872; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7873; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
7874; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7875; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7876; CHECK:       10:
7877; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7878; CHECK-NEXT:    unreachable
7879; CHECK:       11:
7880; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
7881; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
7882; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7883; CHECK-NEXT:    ret <16 x i32> [[RES2]]
7884;
7885  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
7886  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
7887  %res2 = add <16 x i32> %res, %res1
7888  ret <16 x i32> %res2
7889}
7890
7891declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
7892
7893define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
7894; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2udq_512(
7895; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
7896; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
7897; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
7898; CHECK-NEXT:    call void @llvm.donothing()
7899; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7900; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
7901; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7902; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
7903; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7904; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
7905; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7906; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
7907; CHECK:       6:
7908; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7909; CHECK-NEXT:    unreachable
7910; CHECK:       7:
7911; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
7912; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
7913; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
7914; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
7915; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
7916; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
7917; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7918; CHECK:       10:
7919; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7920; CHECK-NEXT:    unreachable
7921; CHECK:       11:
7922; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
7923; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
7924; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7925; CHECK-NEXT:    ret <16 x i32> [[RES2]]
7926;
7927  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
7928  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
7929  %res2 = add <16 x i32> %res, %res1
7930  ret <16 x i32> %res2
7931}
7932
7933declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
7934
7935define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1) #0 {
7936; CHECK-LABEL: @test_getexp_ss(
7937; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
7938; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
7939; CHECK-NEXT:    call void @llvm.donothing()
7940; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
7941; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
7942; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7943; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
7944; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7945; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
7946; CHECK:       5:
7947; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7948; CHECK-NEXT:    unreachable
7949; CHECK:       6:
7950; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
7951; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7952; CHECK-NEXT:    ret <4 x float> [[RES]]
7953;
7954  %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
7955  ret <4 x float> %res
7956}
7957
7958define <4 x float> @test_mask_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
7959; CHECK-LABEL: @test_mask_getexp_ss(
7960; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
7961; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
7962; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
7963; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
7964; CHECK-NEXT:    call void @llvm.donothing()
7965; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
7966; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
7967; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7968; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
7969; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
7970; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
7971; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
7972; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
7973; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
7974; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
7975; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
7976; CHECK:       8:
7977; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7978; CHECK-NEXT:    unreachable
7979; CHECK:       9:
7980; CHECK-NEXT:    [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
7981; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
7982; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
7983; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
7984; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
7985; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
7986; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
7987; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
7988; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
7989; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
7990; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
7991; CHECK-NEXT:    br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7992; CHECK:       13:
7993; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
7994; CHECK-NEXT:    unreachable
7995; CHECK:       14:
7996; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 8)
7997; CHECK-NEXT:    [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
7998; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
7999; CHECK-NEXT:    ret <4 x float> [[RES_1]]
8000;
8001  %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
8002  %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
8003  %res.1 = fadd <4 x float> %res0, %res1
8004  ret <4 x float> %res.1
8005}
8006
8007define <4 x float> @test_maskz_getexp_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
8008; CHECK-LABEL: @test_maskz_getexp_ss(
8009; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8010; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8011; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8012; CHECK-NEXT:    call void @llvm.donothing()
8013; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8014; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8015; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8016; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8017; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8018; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
8019; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8020; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8021; CHECK:       6:
8022; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8023; CHECK-NEXT:    unreachable
8024; CHECK:       7:
8025; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
8026; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8027; CHECK-NEXT:    ret <4 x float> [[RES]]
8028;
8029  %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
8030  ret <4 x float> %res
8031}
8032
8033declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
8034
8035define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1) #0 {
8036; CHECK-LABEL: @test_getexp_sd(
8037; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8038; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8039; CHECK-NEXT:    call void @llvm.donothing()
8040; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8041; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
8042; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8043; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
8044; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8045; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
8046; CHECK:       5:
8047; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8048; CHECK-NEXT:    unreachable
8049; CHECK:       6:
8050; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
8051; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8052; CHECK-NEXT:    ret <2 x double> [[RES]]
8053;
8054  %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
8055  ret <2 x double> %res
8056}
8057
8058define <2 x double> @test_mask_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
8059; CHECK-LABEL: @test_mask_getexp_sd(
8060; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8061; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8062; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8063; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
8064; CHECK-NEXT:    call void @llvm.donothing()
8065; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8066; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
8067; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8068; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
8069; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8070; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8071; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
8072; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8073; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
8074; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
8075; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8076; CHECK:       8:
8077; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8078; CHECK-NEXT:    unreachable
8079; CHECK:       9:
8080; CHECK-NEXT:    [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
8081; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8082; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
8083; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8084; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
8085; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
8086; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8087; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
8088; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
8089; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
8090; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
8091; CHECK-NEXT:    br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
8092; CHECK:       13:
8093; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8094; CHECK-NEXT:    unreachable
8095; CHECK:       14:
8096; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 8)
8097; CHECK-NEXT:    [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
8098; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8099; CHECK-NEXT:    ret <2 x double> [[RES_1]]
8100;
8101  %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
8102  %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
8103  %res.1 = fadd <2 x double> %res0, %res1
8104  ret <2 x double> %res.1
8105}
8106
8107define <2 x double> @test_maskz_getexp_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
8108; CHECK-LABEL: @test_maskz_getexp_sd(
8109; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8110; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8111; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8112; CHECK-NEXT:    call void @llvm.donothing()
8113; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8114; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8115; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8116; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8117; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8118; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
8119; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8120; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8121; CHECK:       6:
8122; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8123; CHECK-NEXT:    unreachable
8124; CHECK:       7:
8125; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
8126; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8127; CHECK-NEXT:    ret <2 x double> [[RES]]
8128;
8129  %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
8130  ret <2 x double> %res
8131}
8132
8133declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
8134
8135define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
8136; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd(
8137; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8138; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8139; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8140; CHECK-NEXT:    call void @llvm.donothing()
8141; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8142; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8143; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8144; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8145; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8146; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
8147; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8148; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8149; CHECK:       6:
8150; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8151; CHECK-NEXT:    unreachable
8152; CHECK:       7:
8153; CHECK-NEXT:    [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 5, i8 [[X3:%.*]], i32 8)
8154; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
8155; CHECK-NEXT:    ret i8 [[RES4]]
8156;
8157  %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
8158  ret i8 %res4
8159}
8160
8161define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
8162; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd_all(
8163; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8164; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8165; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8166; CHECK-NEXT:    call void @llvm.donothing()
8167; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8168; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8169; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8170; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8171; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8172; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8173; CHECK:       6:
8174; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8175; CHECK-NEXT:    unreachable
8176; CHECK:       7:
8177; CHECK-NEXT:    [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 2, i8 -1, i32 4)
8178; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8179; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
8180; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8181; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
8182; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
8183; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8184; CHECK:       10:
8185; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8186; CHECK-NEXT:    unreachable
8187; CHECK:       11:
8188; CHECK-NEXT:    [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 3, i8 -1, i32 8)
8189; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8190; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
8191; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8192; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
8193; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
8194; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
8195; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
8196; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
8197; CHECK:       14:
8198; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8199; CHECK-NEXT:    unreachable
8200; CHECK:       15:
8201; CHECK-NEXT:    [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
8202; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8203; CHECK-NEXT:    [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
8204; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8205; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
8206; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
8207; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
8208; CHECK-NEXT:    [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
8209; CHECK-NEXT:    br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
8210; CHECK:       18:
8211; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8212; CHECK-NEXT:    unreachable
8213; CHECK:       19:
8214; CHECK-NEXT:    [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 5, i8 [[X3]], i32 8)
8215; CHECK-NEXT:    [[TMP20:%.*]] = xor i8 [[RES1]], -1
8216; CHECK-NEXT:    [[TMP21:%.*]] = xor i8 [[RES2]], -1
8217; CHECK-NEXT:    [[TMP22:%.*]] = and i8 [[TMP20]], 0
8218; CHECK-NEXT:    [[TMP23:%.*]] = and i8 0, [[TMP21]]
8219; CHECK-NEXT:    [[TMP24:%.*]] = or i8 0, [[TMP22]]
8220; CHECK-NEXT:    [[TMP25:%.*]] = or i8 [[TMP24]], [[TMP23]]
8221; CHECK-NEXT:    [[RES11:%.*]] = or i8 [[RES1]], [[RES2]]
8222; CHECK-NEXT:    [[TMP26:%.*]] = xor i8 [[RES3]], -1
8223; CHECK-NEXT:    [[TMP27:%.*]] = xor i8 [[RES4]], -1
8224; CHECK-NEXT:    [[TMP28:%.*]] = and i8 [[TMP26]], 0
8225; CHECK-NEXT:    [[TMP29:%.*]] = and i8 0, [[TMP27]]
8226; CHECK-NEXT:    [[TMP30:%.*]] = or i8 0, [[TMP28]]
8227; CHECK-NEXT:    [[TMP31:%.*]] = or i8 [[TMP30]], [[TMP29]]
8228; CHECK-NEXT:    [[RES12:%.*]] = or i8 [[RES3]], [[RES4]]
8229; CHECK-NEXT:    [[TMP32:%.*]] = xor i8 [[RES11]], -1
8230; CHECK-NEXT:    [[TMP33:%.*]] = xor i8 [[RES12]], -1
8231; CHECK-NEXT:    [[TMP34:%.*]] = and i8 [[TMP25]], [[TMP31]]
8232; CHECK-NEXT:    [[TMP35:%.*]] = and i8 [[TMP32]], [[TMP31]]
8233; CHECK-NEXT:    [[TMP36:%.*]] = and i8 [[TMP25]], [[TMP33]]
8234; CHECK-NEXT:    [[TMP37:%.*]] = or i8 [[TMP34]], [[TMP35]]
8235; CHECK-NEXT:    [[TMP38:%.*]] = or i8 [[TMP37]], [[TMP36]]
8236; CHECK-NEXT:    [[RES13:%.*]] = or i8 [[RES11]], [[RES12]]
8237; CHECK-NEXT:    store i8 [[TMP38]], ptr @__msan_retval_tls, align 8
8238; CHECK-NEXT:    ret i8 [[RES13]]
8239;
8240  %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
8241  %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
8242  %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
8243  %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
8244
8245  %res11 = or i8 %res1, %res2
8246  %res12 = or i8 %res3, %res4
8247  %res13 = or i8 %res11, %res12
8248  ret i8 %res13
8249}
8250
8251declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
8252
8253define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
8254; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss(
8255; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8256; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8257; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8258; CHECK-NEXT:    call void @llvm.donothing()
8259; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8260; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8261; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8262; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8263; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8264; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
8265; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8266; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8267; CHECK:       6:
8268; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8269; CHECK-NEXT:    unreachable
8270; CHECK:       7:
8271; CHECK-NEXT:    [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 3, i8 [[X3:%.*]], i32 4)
8272; CHECK-NEXT:    store i8 0, ptr @__msan_retval_tls, align 8
8273; CHECK-NEXT:    ret i8 [[RES2]]
8274;
8275  %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
8276  ret i8 %res2
8277}
8278
8279
8280define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
8281; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss_all(
8282; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8283; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8284; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8285; CHECK-NEXT:    call void @llvm.donothing()
8286; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8287; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
8288; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8289; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
8290; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8291; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8292; CHECK:       6:
8293; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8294; CHECK-NEXT:    unreachable
8295; CHECK:       7:
8296; CHECK-NEXT:    [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 2, i8 -1, i32 4)
8297; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8298; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
8299; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8300; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
8301; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
8302; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8303; CHECK:       10:
8304; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8305; CHECK-NEXT:    unreachable
8306; CHECK:       11:
8307; CHECK-NEXT:    [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 3, i8 -1, i32 8)
8308; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8309; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
8310; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8311; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
8312; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
8313; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
8314; CHECK-NEXT:    [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
8315; CHECK-NEXT:    br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
8316; CHECK:       14:
8317; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8318; CHECK-NEXT:    unreachable
8319; CHECK:       15:
8320; CHECK-NEXT:    [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
8321; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8322; CHECK-NEXT:    [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
8323; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8324; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
8325; CHECK-NEXT:    [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
8326; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
8327; CHECK-NEXT:    [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
8328; CHECK-NEXT:    br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
8329; CHECK:       18:
8330; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8331; CHECK-NEXT:    unreachable
8332; CHECK:       19:
8333; CHECK-NEXT:    [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 5, i8 [[X3]], i32 8)
8334; CHECK-NEXT:    [[TMP20:%.*]] = and i8 [[RES1]], 0
8335; CHECK-NEXT:    [[TMP21:%.*]] = and i8 0, [[RES2]]
8336; CHECK-NEXT:    [[TMP22:%.*]] = or i8 0, [[TMP20]]
8337; CHECK-NEXT:    [[TMP23:%.*]] = or i8 [[TMP22]], [[TMP21]]
8338; CHECK-NEXT:    [[RES11:%.*]] = and i8 [[RES1]], [[RES2]]
8339; CHECK-NEXT:    [[TMP24:%.*]] = and i8 [[RES3]], 0
8340; CHECK-NEXT:    [[TMP25:%.*]] = and i8 0, [[RES4]]
8341; CHECK-NEXT:    [[TMP26:%.*]] = or i8 0, [[TMP24]]
8342; CHECK-NEXT:    [[TMP27:%.*]] = or i8 [[TMP26]], [[TMP25]]
8343; CHECK-NEXT:    [[RES12:%.*]] = and i8 [[RES3]], [[RES4]]
8344; CHECK-NEXT:    [[TMP28:%.*]] = and i8 [[TMP23]], [[TMP27]]
8345; CHECK-NEXT:    [[TMP29:%.*]] = and i8 [[RES11]], [[TMP27]]
8346; CHECK-NEXT:    [[TMP30:%.*]] = and i8 [[TMP23]], [[RES12]]
8347; CHECK-NEXT:    [[TMP31:%.*]] = or i8 [[TMP28]], [[TMP29]]
8348; CHECK-NEXT:    [[TMP32:%.*]] = or i8 [[TMP31]], [[TMP30]]
8349; CHECK-NEXT:    [[RES13:%.*]] = and i8 [[RES11]], [[RES12]]
8350; CHECK-NEXT:    store i8 [[TMP32]], ptr @__msan_retval_tls, align 8
8351; CHECK-NEXT:    ret i8 [[RES13]]
8352;
8353  %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
8354  %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
8355  %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
8356  %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
8357
8358  %res11 = and i8 %res1, %res2
8359  %res12 = and i8 %res3, %res4
8360  %res13 = and i8 %res11, %res12
8361  ret i8 %res13
8362}
8363
8364declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
8365
8366define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 {
8367; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_pd_512(
8368; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
8369; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8370; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8371; CHECK-NEXT:    call void @llvm.donothing()
8372; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
8373; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
8374; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
8375; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
8376; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8377; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
8378; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8379; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8380; CHECK:       6:
8381; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8382; CHECK-NEXT:    unreachable
8383; CHECK:       7:
8384; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0:%.*]], i32 11, <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
8385; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
8386; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
8387; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
8388; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
8389; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
8390; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8391; CHECK:       10:
8392; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8393; CHECK-NEXT:    unreachable
8394; CHECK:       11:
8395; CHECK-NEXT:    [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0]], i32 11, <8 x double> [[X2]], i8 -1, i32 8)
8396; CHECK-NEXT:    [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
8397; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8398; CHECK-NEXT:    ret <8 x double> [[RES2]]
8399;
8400  %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
8401  %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
8402  %res2 = fadd <8 x double> %res, %res1
8403  ret <8 x double> %res2
8404}
8405
8406declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
8407
8408define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) #0 {
8409; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ps_512(
8410; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8411; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8412; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8413; CHECK-NEXT:    call void @llvm.donothing()
8414; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8415; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
8416; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
8417; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
8418; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8419; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
8420; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8421; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8422; CHECK:       6:
8423; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8424; CHECK-NEXT:    unreachable
8425; CHECK:       7:
8426; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0:%.*]], i32 11, <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 4)
8427; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8428; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
8429; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
8430; CHECK-NEXT:    [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
8431; CHECK-NEXT:    [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
8432; CHECK-NEXT:    br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8433; CHECK:       10:
8434; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8435; CHECK-NEXT:    unreachable
8436; CHECK:       11:
8437; CHECK-NEXT:    [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0]], i32 11, <16 x float> [[X2]], i16 -1, i32 8)
8438; CHECK-NEXT:    [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
8439; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8440; CHECK-NEXT:    ret <16 x float> [[RES2]]
8441;
8442  %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
8443  %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
8444  %res2 = fadd <16 x float> %res, %res1
8445  ret <16 x float> %res2
8446}
8447
8448declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
8449
8450define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) #0 {
8451; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_sd(
8452; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8453; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8454; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8455; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
8456; CHECK-NEXT:    call void @llvm.donothing()
8457; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8458; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
8459; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8460; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
8461; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8462; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8463; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
8464; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8465; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
8466; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
8467; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8468; CHECK:       8:
8469; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8470; CHECK-NEXT:    unreachable
8471; CHECK:       9:
8472; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 11, <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
8473; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8474; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
8475; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8476; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
8477; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
8478; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
8479; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
8480; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
8481; CHECK:       12:
8482; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8483; CHECK-NEXT:    unreachable
8484; CHECK:       13:
8485; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 12, <2 x double> zeroinitializer, i8 [[X3]], i32 4)
8486; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8487; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
8488; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8489; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
8490; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
8491; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8492; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
8493; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
8494; CHECK-NEXT:    [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
8495; CHECK-NEXT:    [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
8496; CHECK-NEXT:    br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
8497; CHECK:       17:
8498; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8499; CHECK-NEXT:    unreachable
8500; CHECK:       18:
8501; CHECK-NEXT:    [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 13, <2 x double> [[X2]], i8 [[X3]], i32 8)
8502; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8503; CHECK-NEXT:    [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
8504; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8505; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
8506; CHECK-NEXT:    [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
8507; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8508; CHECK-NEXT:    [[_MSCMP21:%.*]] = icmp ne i128 [[TMP21]], 0
8509; CHECK-NEXT:    [[_MSOR22:%.*]] = or i1 [[_MSOR20]], [[_MSCMP21]]
8510; CHECK-NEXT:    br i1 [[_MSOR22]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
8511; CHECK:       22:
8512; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8513; CHECK-NEXT:    unreachable
8514; CHECK:       23:
8515; CHECK-NEXT:    [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 14, <2 x double> [[X2]], i8 -1, i32 4)
8516; CHECK-NEXT:    [[RES11:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
8517; CHECK-NEXT:    [[RES12:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
8518; CHECK-NEXT:    [[RES13:%.*]] = fadd <2 x double> [[RES11]], [[RES12]]
8519; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8520; CHECK-NEXT:    ret <2 x double> [[RES13]]
8521;
8522  %res  = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
8523  %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 12, <2 x double> zeroinitializer, i8 %x3, i32 4)
8524  %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 13, <2 x double> %x2, i8 %x3, i32 8)
8525  %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 14, <2 x double> %x2, i8 -1, i32 4)
8526  %res11 = fadd <2 x double> %res, %res1
8527  %res12 = fadd <2 x double> %res2, %res3
8528  %res13 = fadd <2 x double> %res11, %res12
8529  ret <2 x double> %res13
8530}
8531
8532declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
8533
8534define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) #0 {
8535; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss(
8536; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8537; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8538; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8539; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
8540; CHECK-NEXT:    call void @llvm.donothing()
8541; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8542; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
8543; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8544; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
8545; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8546; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
8547; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
8548; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8549; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
8550; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
8551; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8552; CHECK:       8:
8553; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8554; CHECK-NEXT:    unreachable
8555; CHECK:       9:
8556; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 11, <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
8557; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8558; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
8559; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8560; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
8561; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
8562; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
8563; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
8564; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
8565; CHECK:       12:
8566; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8567; CHECK-NEXT:    unreachable
8568; CHECK:       13:
8569; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 12, <4 x float> zeroinitializer, i8 [[X3]], i32 4)
8570; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8571; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
8572; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8573; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
8574; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
8575; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
8576; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
8577; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
8578; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
8579; CHECK:       17:
8580; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8581; CHECK-NEXT:    unreachable
8582; CHECK:       18:
8583; CHECK-NEXT:    [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 13, <4 x float> [[X2]], i8 -1, i32 8)
8584; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8585; CHECK-NEXT:    [[_MSCMP16:%.*]] = icmp ne i128 [[TMP19]], 0
8586; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8587; CHECK-NEXT:    [[_MSCMP17:%.*]] = icmp ne i128 [[TMP20]], 0
8588; CHECK-NEXT:    [[_MSOR18:%.*]] = or i1 [[_MSCMP16]], [[_MSCMP17]]
8589; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
8590; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i128 [[TMP21]], 0
8591; CHECK-NEXT:    [[_MSOR20:%.*]] = or i1 [[_MSOR18]], [[_MSCMP19]]
8592; CHECK-NEXT:    br i1 [[_MSOR20]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
8593; CHECK:       22:
8594; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8595; CHECK-NEXT:    unreachable
8596; CHECK:       23:
8597; CHECK-NEXT:    [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 14, <4 x float> [[X2]], i8 -1, i32 4)
8598; CHECK-NEXT:    [[RES11:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
8599; CHECK-NEXT:    [[RES12:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
8600; CHECK-NEXT:    [[RES13:%.*]] = fadd <4 x float> [[RES11]], [[RES12]]
8601; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8602; CHECK-NEXT:    ret <4 x float> [[RES13]]
8603;
8604  %res  = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
8605  %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 12, <4 x float> zeroinitializer, i8 %x3, i32 4)
8606  %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 13, <4 x float> %x2, i8 -1, i32 8)
8607  %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 14, <4 x float> %x2, i8 -1, i32 4)
8608  %res11 = fadd <4 x float> %res, %res1
8609  %res12 = fadd <4 x float> %res2, %res3
8610  %res13 = fadd <4 x float> %res11, %res12
8611  ret <4 x float> %res13
8612}
8613
8614define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, ptr %x1p, <4 x float> %extra_param) #0 {
8615; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss_load(
8616; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8617; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8618; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
8619; CHECK-NEXT:    call void @llvm.donothing()
8620; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8621; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
8622; CHECK:       4:
8623; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8624; CHECK-NEXT:    unreachable
8625; CHECK:       5:
8626; CHECK-NEXT:    [[X1:%.*]] = load <4 x float>, ptr [[X1P:%.*]], align 16
8627; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[X1P]] to i64
8628; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
8629; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8630; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
8631; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8632; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
8633; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
8634; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
8635; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
8636; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
8637; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
8638; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
8639; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
8640; CHECK:       12:
8641; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8642; CHECK-NEXT:    unreachable
8643; CHECK:       13:
8644; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1]], i32 11, <4 x float> [[EXTRA_PARAM:%.*]], i8 -1, i32 4)
8645; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8646; CHECK-NEXT:    ret <4 x float> [[RES]]
8647;
8648  %x1 = load <4 x float>, ptr %x1p
8649  %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %extra_param, i8 -1, i32 4)
8650  ret <4 x float> %res
8651}
8652
8653declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
8654
8655define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) #0 {
8656; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
8657; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
8658; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8659; CHECK-NEXT:    call void @llvm.donothing()
8660; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
8661; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
8662; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
8663; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
8664; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8665; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
8666; CHECK:       5:
8667; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8668; CHECK-NEXT:    unreachable
8669; CHECK:       6:
8670; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
8671; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8672; CHECK-NEXT:    ret <8 x double> [[RES]]
8673;
8674  %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
8675  ret <8 x double> %res
8676}
8677
8678define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) #0 {
8679; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_mask(
8680; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
8681; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8682; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
8683; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8684; CHECK-NEXT:    call void @llvm.donothing()
8685; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
8686; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
8687; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
8688; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
8689; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8690; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
8691; CHECK:       7:
8692; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8693; CHECK-NEXT:    unreachable
8694; CHECK:       8:
8695; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
8696; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
8697; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
8698; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
8699; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
8700; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
8701; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
8702; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
8703; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]]
8704; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
8705; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> [[X2]]
8706; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8707; CHECK-NEXT:    ret <8 x double> [[RES2]]
8708;
8709  %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
8710  %mask.cast = bitcast i8 %mask to <8 x i1>
8711  %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
8712  ret <8 x double> %res2
8713}
8714
8715define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) #0 {
8716; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_maskz(
8717; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
8718; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8719; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8720; CHECK-NEXT:    call void @llvm.donothing()
8721; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
8722; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
8723; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
8724; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
8725; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8726; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8727; CHECK:       6:
8728; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8729; CHECK-NEXT:    unreachable
8730; CHECK:       7:
8731; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
8732; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
8733; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
8734; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
8735; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
8736; CHECK-NEXT:    [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
8737; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
8738; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
8739; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
8740; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> zeroinitializer
8741; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8742; CHECK-NEXT:    ret <8 x double> [[RES2]]
8743;
8744  %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
8745  %mask.cast = bitcast i8 %mask to <8 x i1>
8746  %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
8747  ret <8 x double> %res2
8748}
8749
8750declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
8751
8752define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) #0 {
8753; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512(
8754; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8755; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8756; CHECK-NEXT:    call void @llvm.donothing()
8757; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8758; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
8759; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
8760; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
8761; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8762; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
8763; CHECK:       5:
8764; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8765; CHECK-NEXT:    unreachable
8766; CHECK:       6:
8767; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
8768; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8769; CHECK-NEXT:    ret <16 x float> [[RES]]
8770;
8771  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
8772  ret <16 x float> %res
8773}
8774
8775define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
8776; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_mask(
8777; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8778; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8779; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
8780; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8781; CHECK-NEXT:    call void @llvm.donothing()
8782; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8783; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
8784; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
8785; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
8786; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8787; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
8788; CHECK:       7:
8789; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8790; CHECK-NEXT:    unreachable
8791; CHECK:       8:
8792; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
8793; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
8794; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
8795; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
8796; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
8797; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
8798; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
8799; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
8800; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]]
8801; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
8802; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
8803; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8804; CHECK-NEXT:    ret <16 x float> [[RES2]]
8805;
8806  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
8807  %mask.cast = bitcast i16 %mask to <16 x i1>
8808  %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
8809  ret <16 x float> %res2
8810}
8811
8812define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
8813; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_maskz(
8814; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8815; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
8816; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8817; CHECK-NEXT:    call void @llvm.donothing()
8818; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8819; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
8820; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
8821; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
8822; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8823; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
8824; CHECK:       6:
8825; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8826; CHECK-NEXT:    unreachable
8827; CHECK:       7:
8828; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
8829; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
8830; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
8831; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
8832; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
8833; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
8834; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
8835; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
8836; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
8837; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
8838; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8839; CHECK-NEXT:    ret <16 x float> [[RES2]]
8840;
8841  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
8842  %mask.cast = bitcast i16 %mask to <16 x i1>
8843  %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
8844  ret <16 x float> %res2
8845}
8846
8847define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) #0 {
8848; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool(
8849; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8850; CHECK-NEXT:    call void @llvm.donothing()
8851; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8852; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
8853; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8854; CHECK:       3:
8855; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8856; CHECK-NEXT:    unreachable
8857; CHECK:       4:
8858; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8859; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
8860; CHECK-NEXT:    ret <16 x float> [[RES]]
8861;
8862  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8863  ret <16 x float> %res
8864}
8865
8866define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
8867; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(
8868; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8869; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
8870; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8871; CHECK-NEXT:    call void @llvm.donothing()
8872; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8873; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
8874; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
8875; CHECK:       5:
8876; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8877; CHECK-NEXT:    unreachable
8878; CHECK:       6:
8879; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8880; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
8881; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
8882; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
8883; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
8884; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
8885; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]]
8886; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
8887; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP3]]
8888; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]]
8889; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
8890; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8891; CHECK-NEXT:    ret <16 x float> [[RES2]]
8892;
8893  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8894  %mask.cast = bitcast i16 %mask to <16 x i1>
8895  %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
8896  ret <16 x float> %res2
8897}
8898
8899define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
8900; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(
8901; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
8902; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
8903; CHECK-NEXT:    call void @llvm.donothing()
8904; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
8905; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
8906; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
8907; CHECK:       4:
8908; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8909; CHECK-NEXT:    unreachable
8910; CHECK:       5:
8911; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8912; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
8913; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
8914; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
8915; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
8916; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer
8917; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
8918; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer
8919; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP7]]
8920; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
8921; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
8922; CHECK-NEXT:    ret <16 x float> [[RES2]]
8923;
8924  %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
8925  %mask.cast = bitcast i16 %mask to <16 x i1>
8926  %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
8927  ret <16 x float> %res2
8928}
8929
8930declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32)
8931
8932define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) #0 {
8933; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ss2sd_round(
8934; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
8935; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8936; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8937; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
8938; CHECK-NEXT:    call void @llvm.donothing()
8939; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8940; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
8941; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8942; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
8943; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8944; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8945; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
8946; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8947; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
8948; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
8949; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8950; CHECK:       8:
8951; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8952; CHECK-NEXT:    unreachable
8953; CHECK:       9:
8954; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0:%.*]], <4 x float> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
8955; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
8956; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
8957; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
8958; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
8959; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
8960; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
8961; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
8962; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
8963; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
8964; CHECK:       13:
8965; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
8966; CHECK-NEXT:    unreachable
8967; CHECK:       14:
8968; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0]], <4 x float> [[X1]], <2 x double> [[X2]], i8 -1, i32 8)
8969; CHECK-NEXT:    [[RES2:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
8970; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
8971; CHECK-NEXT:    ret <2 x double> [[RES2]]
8972;
8973  %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
8974  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
8975  %res2 = fadd <2 x double> %res, %res1
8976  ret <2 x double> %res2
8977}
8978
8979declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32)
8980
8981define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) #0 {
8982; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_sd2ss_round(
8983; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
8984; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
8985; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
8986; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
8987; CHECK-NEXT:    call void @llvm.donothing()
8988; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
8989; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
8990; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
8991; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
8992; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
8993; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
8994; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
8995; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
8996; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
8997; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
8998; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8999; CHECK:       8:
9000; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9001; CHECK-NEXT:    unreachable
9002; CHECK:       9:
9003; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0:%.*]], <2 x double> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
9004; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
9005; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
9006; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9007; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
9008; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
9009; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
9010; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
9011; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
9012; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
9013; CHECK:       13:
9014; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9015; CHECK-NEXT:    unreachable
9016; CHECK:       14:
9017; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0]], <2 x double> [[X1]], <4 x float> [[X2]], i8 -1, i32 8)
9018; CHECK-NEXT:    [[RES2:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
9019; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
9020; CHECK-NEXT:    ret <4 x float> [[RES2]]
9021;
9022  %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11)
9023  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
9024  %res2 = fadd <4 x float> %res, %res1
9025  ret <4 x float> %res2
9026}
9027
9028declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
9029
9030define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
9031; CHECK-LABEL: @test_int_x86_avx512_pternlog_d_512(
9032; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9033; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9034; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9035; CHECK-NEXT:    call void @llvm.donothing()
9036; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9037; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
9038; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9039; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
9040; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9041; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
9042; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
9043; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9044; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
9045; CHECK:       7:
9046; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9047; CHECK-NEXT:    unreachable
9048; CHECK:       8:
9049; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
9050; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
9051; CHECK-NEXT:    ret <16 x i32> [[TMP9]]
9052;
9053  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
9054  ret <16 x i32> %1
9055}
9056
9057define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
9058; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_d_512(
9059; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9060; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9061; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9062; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9063; CHECK-NEXT:    call void @llvm.donothing()
9064; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9065; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9066; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9067; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9068; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9069; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
9070; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9071; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9072; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9073; CHECK:       8:
9074; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9075; CHECK-NEXT:    unreachable
9076; CHECK:       9:
9077; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
9078; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
9079; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
9080; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> [[TMP1]]
9081; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X0]]
9082; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
9083; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP1]]
9084; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
9085; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X0]]
9086; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9087; CHECK-NEXT:    ret <16 x i32> [[TMP17]]
9088;
9089  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
9090  %2 = bitcast i16 %x4 to <16 x i1>
9091  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
9092  ret <16 x i32> %3
9093}
9094
9095define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
9096; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_d_512(
9097; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9098; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9099; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9100; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9101; CHECK-NEXT:    call void @llvm.donothing()
9102; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9103; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9104; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9105; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9106; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9107; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
9108; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9109; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9110; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9111; CHECK:       8:
9112; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9113; CHECK-NEXT:    unreachable
9114; CHECK:       9:
9115; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
9116; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
9117; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
9118; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
9119; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
9120; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
9121; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
9122; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
9123; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
9124; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9125; CHECK-NEXT:    ret <16 x i32> [[TMP17]]
9126;
9127  %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
9128  %2 = bitcast i16 %x4 to <16 x i1>
9129  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
9130  ret <16 x i32> %3
9131}
9132
9133declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
9134
9135define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
9136; CHECK-LABEL: @test_int_x86_avx512_pternlog_q_512(
9137; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9138; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9139; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9140; CHECK-NEXT:    call void @llvm.donothing()
9141; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9142; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
9143; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9144; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
9145; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9146; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9147; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
9148; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9149; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
9150; CHECK:       7:
9151; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9152; CHECK-NEXT:    unreachable
9153; CHECK:       8:
9154; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
9155; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9156; CHECK-NEXT:    ret <8 x i64> [[TMP9]]
9157;
9158  %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
9159  ret <8 x i64> %1
9160}
9161
9162define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
9163; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_q_512(
9164; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9165; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9166; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9167; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9168; CHECK-NEXT:    call void @llvm.donothing()
9169; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9170; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9171; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9172; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9173; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9174; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9175; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9176; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9177; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9178; CHECK:       8:
9179; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9180; CHECK-NEXT:    unreachable
9181; CHECK:       9:
9182; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
9183; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
9184; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
9185; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> [[TMP1]]
9186; CHECK-NEXT:    [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[X0]]
9187; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
9188; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP1]]
9189; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
9190; CHECK-NEXT:    [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[X0]]
9191; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9192; CHECK-NEXT:    ret <8 x i64> [[TMP17]]
9193;
9194  %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
9195  %2 = bitcast i8 %x4 to <8 x i1>
9196  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
9197  ret <8 x i64> %3
9198}
9199
9200define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
9201; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_q_512(
9202; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9203; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9204; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9205; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9206; CHECK-NEXT:    call void @llvm.donothing()
9207; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9208; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9209; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9210; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9211; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9212; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9213; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9214; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9215; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9216; CHECK:       8:
9217; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9218; CHECK-NEXT:    unreachable
9219; CHECK:       9:
9220; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
9221; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
9222; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
9223; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
9224; CHECK-NEXT:    [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
9225; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
9226; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
9227; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
9228; CHECK-NEXT:    [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
9229; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9230; CHECK-NEXT:    ret <8 x i64> [[TMP17]]
9231;
9232  %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
9233  %2 = bitcast i8 %x4 to <8 x i1>
9234  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
9235  ret <8 x i64> %3
9236}
9237
9238define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
9239; CHECK-LABEL: @test_x86_avx512_comi_sd_eq_sae(
9240; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9241; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9242; CHECK-NEXT:    call void @llvm.donothing()
9243; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9244; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9245; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9246; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9247; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9248; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9249; CHECK:       5:
9250; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9251; CHECK-NEXT:    unreachable
9252; CHECK:       6:
9253; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 8)
9254; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9255; CHECK-NEXT:    ret i32 [[RES]]
9256;
9257  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
9258  ret i32 %res
9259}
9260
9261define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
9262; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq_sae(
9263; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9264; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9265; CHECK-NEXT:    call void @llvm.donothing()
9266; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9267; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9268; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9269; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9270; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9271; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9272; CHECK:       5:
9273; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9274; CHECK-NEXT:    unreachable
9275; CHECK:       6:
9276; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 8)
9277; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9278; CHECK-NEXT:    ret i32 [[RES]]
9279;
9280  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
9281  ret i32 %res
9282}
9283
9284define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
9285; CHECK-LABEL: @test_x86_avx512_comi_sd_eq(
9286; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9287; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9288; CHECK-NEXT:    call void @llvm.donothing()
9289; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9290; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9291; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9292; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9293; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9294; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9295; CHECK:       5:
9296; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9297; CHECK-NEXT:    unreachable
9298; CHECK:       6:
9299; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 4)
9300; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9301; CHECK-NEXT:    ret i32 [[RES]]
9302;
9303  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
9304  ret i32 %res
9305}
9306
9307define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
9308; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq(
9309; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9310; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9311; CHECK-NEXT:    call void @llvm.donothing()
9312; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9313; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9314; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9315; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9316; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9317; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9318; CHECK:       5:
9319; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9320; CHECK-NEXT:    unreachable
9321; CHECK:       6:
9322; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 4)
9323; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9324; CHECK-NEXT:    ret i32 [[RES]]
9325;
9326  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
9327  ret i32 %res
9328}
9329
9330define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
9331; CHECK-LABEL: @test_x86_avx512_comi_sd_lt_sae(
9332; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9333; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9334; CHECK-NEXT:    call void @llvm.donothing()
9335; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9336; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9337; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9338; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9339; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9340; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9341; CHECK:       5:
9342; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9343; CHECK-NEXT:    unreachable
9344; CHECK:       6:
9345; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 8)
9346; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9347; CHECK-NEXT:    ret i32 [[RES]]
9348;
9349  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
9350  ret i32 %res
9351}
9352
9353define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
9354; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt_sae(
9355; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9356; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9357; CHECK-NEXT:    call void @llvm.donothing()
9358; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9359; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9360; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9361; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9362; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9363; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9364; CHECK:       5:
9365; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9366; CHECK-NEXT:    unreachable
9367; CHECK:       6:
9368; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 8)
9369; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9370; CHECK-NEXT:    ret i32 [[RES]]
9371;
9372  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
9373  ret i32 %res
9374}
9375
9376define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
9377; CHECK-LABEL: @test_x86_avx512_comi_sd_lt(
9378; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9379; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9380; CHECK-NEXT:    call void @llvm.donothing()
9381; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9382; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9383; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9384; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9385; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9386; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9387; CHECK:       5:
9388; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9389; CHECK-NEXT:    unreachable
9390; CHECK:       6:
9391; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 4)
9392; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9393; CHECK-NEXT:    ret i32 [[RES]]
9394;
9395  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
9396  ret i32 %res
9397}
9398
9399define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
9400; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt(
9401; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9402; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9403; CHECK-NEXT:    call void @llvm.donothing()
9404; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
9405; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9406; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
9407; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9408; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9409; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9410; CHECK:       5:
9411; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9412; CHECK-NEXT:    unreachable
9413; CHECK:       6:
9414; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 4)
9415; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9416; CHECK-NEXT:    ret i32 [[RES]]
9417;
9418  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
9419  ret i32 %res
9420}
9421
9422declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
9423
9424define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) #0 {
9425; CHECK-LABEL: @test_x86_avx512_ucomi_ss_lt(
9426; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
9427; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9428; CHECK-NEXT:    call void @llvm.donothing()
9429; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
9430; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
9431; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
9432; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
9433; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9434; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9435; CHECK:       5:
9436; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9437; CHECK-NEXT:    unreachable
9438; CHECK:       6:
9439; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 9, i32 4)
9440; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
9441; CHECK-NEXT:    ret i32 [[RES]]
9442;
9443  %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
9444  ret i32 %res
9445}
9446
9447declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
9448
9449declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
9450
9451define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1) #0 {
9452; CHECK-LABEL: @test_int_x86_avx512_permvar_df_512(
9453; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9454; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9455; CHECK-NEXT:    call void @llvm.donothing()
9456; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9457; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
9458; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9459; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
9460; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9461; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9462; CHECK:       5:
9463; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9464; CHECK-NEXT:    unreachable
9465; CHECK:       6:
9466; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9467; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9468; CHECK-NEXT:    ret <8 x double> [[TMP7]]
9469;
9470  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
9471  ret <8 x double> %1
9472}
9473
9474define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
9475; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_df_512(
9476; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9477; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9478; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9479; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9480; CHECK-NEXT:    call void @llvm.donothing()
9481; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9482; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9483; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9484; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9485; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9486; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
9487; CHECK:       7:
9488; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9489; CHECK-NEXT:    unreachable
9490; CHECK:       8:
9491; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9492; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
9493; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
9494; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
9495; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
9496; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
9497; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]]
9498; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
9499; CHECK-NEXT:    [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]]
9500; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]]
9501; CHECK-NEXT:    [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]]
9502; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9503; CHECK-NEXT:    ret <8 x double> [[TMP18]]
9504;
9505  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
9506  %2 = bitcast i8 %x3 to <8 x i1>
9507  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
9508  ret <8 x double> %3
9509}
9510
9511define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) #0 {
9512; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_df_512(
9513; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9514; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9515; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9516; CHECK-NEXT:    call void @llvm.donothing()
9517; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9518; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
9519; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9520; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
9521; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9522; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
9523; CHECK:       6:
9524; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9525; CHECK-NEXT:    unreachable
9526; CHECK:       7:
9527; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9528; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
9529; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
9530; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
9531; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
9532; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
9533; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
9534; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
9535; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
9536; CHECK-NEXT:    [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
9537; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9538; CHECK-NEXT:    ret <8 x double> [[TMP16]]
9539;
9540  %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
9541  %2 = bitcast i8 %x3 to <8 x i1>
9542  %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
9543  ret <8 x double> %3
9544}
9545
9546declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
9547
9548define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1) #0 {
9549; CHECK-LABEL: @test_int_x86_avx512_permvar_di_512(
9550; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9551; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9552; CHECK-NEXT:    call void @llvm.donothing()
9553; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
9554; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9555; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
9556; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
9557;
9558  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
9559  ret <8 x i64> %1
9560}
9561
9562define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
9563; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_di_512(
9564; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9565; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9566; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9567; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9568; CHECK-NEXT:    call void @llvm.donothing()
9569; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
9570; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9571; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
9572; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
9573; CHECK-NEXT:    [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]]
9574; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]]
9575; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]]
9576; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]]
9577; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
9578; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]]
9579; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9580; CHECK-NEXT:    ret <8 x i64> [[TMP12]]
9581;
9582  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
9583  %2 = bitcast i8 %x3 to <8 x i1>
9584  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
9585  ret <8 x i64> %3
9586}
9587
9588define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 {
9589; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_di_512(
9590; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9591; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9592; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9593; CHECK-NEXT:    call void @llvm.donothing()
9594; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
9595; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
9596; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
9597; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
9598; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer
9599; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer
9600; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]]
9601; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
9602; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
9603; CHECK-NEXT:    [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
9604; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9605; CHECK-NEXT:    ret <8 x i64> [[TMP11]]
9606;
9607  %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
9608  %2 = bitcast i8 %x3 to <8 x i1>
9609  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
9610  ret <8 x i64> %3
9611}
9612
9613declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
9614
9615define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1) #0 {
9616; CHECK-LABEL: @test_int_x86_avx512_permvar_sf_512(
9617; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9618; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9619; CHECK-NEXT:    call void @llvm.donothing()
9620; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9621; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
9622; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9623; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
9624; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9625; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
9626; CHECK:       5:
9627; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9628; CHECK-NEXT:    unreachable
9629; CHECK:       6:
9630; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9631; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
9632; CHECK-NEXT:    ret <16 x float> [[TMP7]]
9633;
9634  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
9635  ret <16 x float> %1
9636}
9637
9638define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
9639; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_sf_512(
9640; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9641; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9642; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9643; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9644; CHECK-NEXT:    call void @llvm.donothing()
9645; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9646; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9647; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9648; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9649; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9650; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
9651; CHECK:       7:
9652; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9653; CHECK-NEXT:    unreachable
9654; CHECK:       8:
9655; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9656; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
9657; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
9658; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
9659; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
9660; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
9661; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
9662; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
9663; CHECK-NEXT:    [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
9664; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
9665; CHECK-NEXT:    [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]]
9666; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9667; CHECK-NEXT:    ret <16 x float> [[TMP18]]
9668;
9669  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
9670  %2 = bitcast i16 %x3 to <16 x i1>
9671  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
9672  ret <16 x float> %3
9673}
9674
9675define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 {
9676; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_sf_512(
9677; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9678; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9679; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9680; CHECK-NEXT:    call void @llvm.donothing()
9681; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
9682; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
9683; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
9684; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
9685; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9686; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
9687; CHECK:       6:
9688; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9689; CHECK-NEXT:    unreachable
9690; CHECK:       7:
9691; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9692; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
9693; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
9694; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
9695; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
9696; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
9697; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
9698; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
9699; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
9700; CHECK-NEXT:    [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
9701; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9702; CHECK-NEXT:    ret <16 x float> [[TMP16]]
9703;
9704  %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
9705  %2 = bitcast i16 %x3 to <16 x i1>
9706  %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
9707  ret <16 x float> %3
9708}
9709
9710declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
9711
9712define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1) #0 {
9713; CHECK-LABEL: @test_int_x86_avx512_permvar_si_512(
9714; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9715; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9716; CHECK-NEXT:    call void @llvm.donothing()
9717; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
9718; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9719; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
9720; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
9721;
9722  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
9723  ret <16 x i32> %1
9724}
9725
9726define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
9727; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_si_512(
9728; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9729; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9730; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9731; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9732; CHECK-NEXT:    call void @llvm.donothing()
9733; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
9734; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9735; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
9736; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
9737; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]]
9738; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]]
9739; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]]
9740; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]]
9741; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
9742; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]]
9743; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9744; CHECK-NEXT:    ret <16 x i32> [[TMP12]]
9745;
9746  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
9747  %2 = bitcast i16 %x3 to <16 x i1>
9748  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
9749  ret <16 x i32> %3
9750}
9751
9752define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 {
9753; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_si_512(
9754; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
9755; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9756; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9757; CHECK-NEXT:    call void @llvm.donothing()
9758; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
9759; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
9760; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
9761; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
9762; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer
9763; CHECK-NEXT:    [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer
9764; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]]
9765; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
9766; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
9767; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
9768; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
9769; CHECK-NEXT:    ret <16 x i32> [[TMP11]]
9770;
9771  %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
9772  %2 = bitcast i16 %x3 to <16 x i1>
9773  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
9774  ret <16 x i32> %3
9775}
9776
9777declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
9778
9779define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
9780; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512(
9781; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9782; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9783; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9784; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9785; CHECK-NEXT:    call void @llvm.donothing()
9786; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9787; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9788; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9789; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9790; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9791; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9792; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9793; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9794; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
9795; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
9796; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9797; CHECK:       8:
9798; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9799; CHECK-NEXT:    unreachable
9800; CHECK:       9:
9801; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 4, i8 [[X4:%.*]], i32 4)
9802; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9803; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
9804; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9805; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
9806; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
9807; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
9808; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
9809; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
9810; CHECK:       12:
9811; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9812; CHECK-NEXT:    unreachable
9813; CHECK:       13:
9814; CHECK-NEXT:    [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> [[X1]], <8 x i64> [[X2]], i32 5, i8 [[X4]], i32 4)
9815; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9816; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
9817; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9818; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
9819; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
9820; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9821; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
9822; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
9823; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
9824; CHECK:       17:
9825; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9826; CHECK-NEXT:    unreachable
9827; CHECK:       18:
9828; CHECK-NEXT:    [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 3, i8 -1, i32 8)
9829; CHECK-NEXT:    [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
9830; CHECK-NEXT:    [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
9831; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9832; CHECK-NEXT:    ret <8 x double> [[RES4]]
9833;
9834  %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
9835  %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
9836  %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8)
9837  %res3 = fadd <8 x double> %res, %res1
9838  %res4 = fadd <8 x double> %res3, %res2
9839  ret <8 x double> %res4
9840}
9841
9842define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, ptr %x2ptr) #0 {
9843; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512_load(
9844; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9845; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9846; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9847; CHECK-NEXT:    call void @llvm.donothing()
9848; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9849; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
9850; CHECK:       4:
9851; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9852; CHECK-NEXT:    unreachable
9853; CHECK:       5:
9854; CHECK-NEXT:    [[X2:%.*]] = load <8 x i64>, ptr [[X2PTR:%.*]], align 64
9855; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
9856; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
9857; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
9858; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64
9859; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9860; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
9861; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9862; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
9863; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
9864; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[_MSLD]] to i512
9865; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
9866; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
9867; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
9868; CHECK:       12:
9869; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9870; CHECK-NEXT:    unreachable
9871; CHECK:       13:
9872; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2]], i32 3, i8 -1, i32 4)
9873; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9874; CHECK-NEXT:    ret <8 x double> [[RES]]
9875;
9876  %x2 = load <8 x i64>, ptr %x2ptr
9877  %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4)
9878  ret <8 x double> %res
9879}
9880
9881declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
9882
9883define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
9884; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_pd_512(
9885; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
9886; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
9887; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
9888; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
9889; CHECK-NEXT:    call void @llvm.donothing()
9890; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9891; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
9892; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9893; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
9894; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9895; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9896; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
9897; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9898; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
9899; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
9900; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9901; CHECK:       8:
9902; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9903; CHECK-NEXT:    unreachable
9904; CHECK:       9:
9905; CHECK-NEXT:    [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 3, i8 [[X4:%.*]], i32 4)
9906; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9907; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
9908; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9909; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
9910; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
9911; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
9912; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
9913; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
9914; CHECK:       12:
9915; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9916; CHECK-NEXT:    unreachable
9917; CHECK:       13:
9918; CHECK-NEXT:    [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 4)
9919; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
9920; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
9921; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
9922; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
9923; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
9924; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
9925; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
9926; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
9927; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
9928; CHECK:       17:
9929; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9930; CHECK-NEXT:    unreachable
9931; CHECK:       18:
9932; CHECK-NEXT:    [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 2, i8 -1, i32 8)
9933; CHECK-NEXT:    [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
9934; CHECK-NEXT:    [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
9935; CHECK-NEXT:    store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9936; CHECK-NEXT:    ret <8 x double> [[RES4]]
9937;
9938  %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
9939  %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
9940  %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8)
9941  %res3 = fadd <8 x double> %res, %res1
9942  %res4 = fadd <8 x double> %res3, %res2
9943  ret <8 x double> %res4
9944}
9945
9946declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
9947
9948define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
9949; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ss(
9950; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
9951; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9952; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9953; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
9954; CHECK-NEXT:    call void @llvm.donothing()
9955; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
9956; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
9957; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
9958; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
9959; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
9960; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
9961; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
9962; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
9963; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
9964; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
9965; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9966; CHECK:       8:
9967; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9968; CHECK-NEXT:    unreachable
9969; CHECK:       9:
9970; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
9971; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
9972; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
9973; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
9974; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
9975; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
9976; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
9977; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
9978; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
9979; CHECK:       12:
9980; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9981; CHECK-NEXT:    unreachable
9982; CHECK:       13:
9983; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 4)
9984; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
9985; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
9986; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
9987; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
9988; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
9989; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
9990; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
9991; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
9992; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
9993; CHECK:       17:
9994; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
9995; CHECK-NEXT:    unreachable
9996; CHECK:       18:
9997; CHECK-NEXT:    [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 5, i8 -1, i32 8)
9998; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
9999; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
10000; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10001; CHECK-NEXT:    ret <4 x float> [[RES4]]
10002;
10003  %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
10004  %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
10005  %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8)
10006  %res3 = fadd <4 x float> %res, %res1
10007  %res4 = fadd <4 x float> %res3, %res2
10008  ret <4 x float> %res4
10009}
10010
10011declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
10012
10013define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
10014; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ss(
10015; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10016; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10017; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10018; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10019; CHECK-NEXT:    call void @llvm.donothing()
10020; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
10021; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
10022; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
10023; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
10024; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
10025; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
10026; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
10027; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
10028; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
10029; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
10030; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
10031; CHECK:       8:
10032; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10033; CHECK-NEXT:    unreachable
10034; CHECK:       9:
10035; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
10036; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
10037; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
10038; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
10039; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
10040; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
10041; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
10042; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
10043; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10044; CHECK:       12:
10045; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10046; CHECK-NEXT:    unreachable
10047; CHECK:       13:
10048; CHECK-NEXT:    [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 8)
10049; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
10050; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
10051; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
10052; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
10053; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
10054; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
10055; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
10056; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
10057; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
10058; CHECK:       17:
10059; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10060; CHECK-NEXT:    unreachable
10061; CHECK:       18:
10062; CHECK-NEXT:    [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 6, i8 -1, i32 4)
10063; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
10064; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
10065; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10066; CHECK-NEXT:    ret <4 x float> [[RES4]]
10067;
10068  %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
10069  %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
10070  %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 6, i8 -1, i32 4)
10071  %res3 = fadd <4 x float> %res, %res1
10072  %res4 = fadd <4 x float> %res3, %res2
10073  ret <4 x float> %res4
10074}
10075
10076declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
10077
10078define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
10079; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512(
10080; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
10081; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
10082; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
10083; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
10084; CHECK-NEXT:    call void @llvm.donothing()
10085; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10086; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
10087; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10088; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
10089; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
10090; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
10091; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
10092; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
10093; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
10094; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
10095; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
10096; CHECK:       8:
10097; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10098; CHECK-NEXT:    unreachable
10099; CHECK:       9:
10100; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
10101; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10102; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
10103; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10104; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
10105; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
10106; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
10107; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
10108; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10109; CHECK:       12:
10110; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10111; CHECK-NEXT:    unreachable
10112; CHECK:       13:
10113; CHECK-NEXT:    [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 5, i16 [[X4]], i32 4)
10114; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10115; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
10116; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10117; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
10118; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
10119; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
10120; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
10121; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
10122; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
10123; CHECK:       17:
10124; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10125; CHECK-NEXT:    unreachable
10126; CHECK:       18:
10127; CHECK-NEXT:    [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 5, i16 -1, i32 8)
10128; CHECK-NEXT:    [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
10129; CHECK-NEXT:    [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
10130; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10131; CHECK-NEXT:    ret <16 x float> [[RES4]]
10132;
10133  %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
10134  %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
10135  %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8)
10136  %res3 = fadd <16 x float> %res, %res1
10137  %res4 = fadd <16 x float> %res3, %res2
10138  ret <16 x float> %res4
10139}
10140
10141define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, ptr %x2ptr) #0 {
10142; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512_load(
10143; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
10144; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
10145; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
10146; CHECK-NEXT:    call void @llvm.donothing()
10147; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
10148; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
10149; CHECK:       4:
10150; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10151; CHECK-NEXT:    unreachable
10152; CHECK:       5:
10153; CHECK-NEXT:    [[X2:%.*]] = load <16 x i32>, ptr [[X2PTR:%.*]], align 64
10154; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
10155; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
10156; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
10157; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
10158; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10159; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
10160; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
10161; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
10162; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
10163; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
10164; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
10165; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
10166; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10167; CHECK:       12:
10168; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10169; CHECK-NEXT:    unreachable
10170; CHECK:       13:
10171; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2]], i32 5, i16 -1, i32 4)
10172; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10173; CHECK-NEXT:    ret <16 x float> [[RES]]
10174;
10175  %x2 = load <16 x i32>, ptr %x2ptr
10176  %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4)
10177  ret <16 x float> %res
10178}
10179
10180declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
10181
10182define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
10183; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ps_512(
10184; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
10185; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
10186; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
10187; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
10188; CHECK-NEXT:    call void @llvm.donothing()
10189; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10190; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
10191; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10192; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
10193; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
10194; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
10195; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
10196; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
10197; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
10198; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
10199; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
10200; CHECK:       8:
10201; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10202; CHECK-NEXT:    unreachable
10203; CHECK:       9:
10204; CHECK-NEXT:    [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
10205; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10206; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
10207; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10208; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
10209; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
10210; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
10211; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
10212; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10213; CHECK:       12:
10214; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10215; CHECK-NEXT:    unreachable
10216; CHECK:       13:
10217; CHECK-NEXT:    [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 6, i16 [[X4]], i32 8)
10218; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
10219; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
10220; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
10221; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
10222; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
10223; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
10224; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
10225; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
10226; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
10227; CHECK:       17:
10228; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10229; CHECK-NEXT:    unreachable
10230; CHECK:       18:
10231; CHECK-NEXT:    [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 7, i16 -1, i32 4)
10232; CHECK-NEXT:    [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
10233; CHECK-NEXT:    [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
10234; CHECK-NEXT:    store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10235; CHECK-NEXT:    ret <16 x float> [[RES4]]
10236;
10237  %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
10238  %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 6, i16 %x4, i32 8)
10239  %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 7, i16 -1, i32 4)
10240  %res3 = fadd <16 x float> %res, %res1
10241  %res4 = fadd <16 x float> %res3, %res2
10242  ret <16 x float> %res4
10243}
10244
10245declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
10246
10247define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
10248; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_sd(
10249; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10250; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10251; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10252; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10253; CHECK-NEXT:    call void @llvm.donothing()
10254; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10255; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
10256; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10257; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
10258; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
10259; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
10260; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
10261; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
10262; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
10263; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
10264; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
10265; CHECK:       8:
10266; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10267; CHECK-NEXT:    unreachable
10268; CHECK:       9:
10269; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
10270; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10271; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
10272; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10273; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
10274; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
10275; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
10276; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
10277; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10278; CHECK:       12:
10279; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10280; CHECK-NEXT:    unreachable
10281; CHECK:       13:
10282; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
10283; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10284; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
10285; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10286; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
10287; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
10288; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
10289; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
10290; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
10291; CHECK-NEXT:    br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
10292; CHECK:       17:
10293; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10294; CHECK-NEXT:    unreachable
10295; CHECK:       18:
10296; CHECK-NEXT:    [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 6, i8 -1, i32 4)
10297; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
10298; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
10299; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
10300; CHECK-NEXT:    ret <2 x double> [[RES4]]
10301;
10302  %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
10303  %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
10304  %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 6, i8 -1, i32 4)
10305  %res3 = fadd <2 x double> %res, %res1
10306  %res4 = fadd <2 x double> %res3, %res2
10307  ret <2 x double> %res4
10308}
10309
10310declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
10311
10312define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
10313; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_sd(
10314; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10315; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10316; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10317; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10318; CHECK-NEXT:    call void @llvm.donothing()
10319; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10320; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
10321; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10322; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
10323; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
10324; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
10325; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
10326; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
10327; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
10328; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
10329; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
10330; CHECK:       8:
10331; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10332; CHECK-NEXT:    unreachable
10333; CHECK:       9:
10334; CHECK-NEXT:    [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
10335; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10336; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
10337; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10338; CHECK-NEXT:    [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
10339; CHECK-NEXT:    [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
10340; CHECK-NEXT:    [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
10341; CHECK-NEXT:    [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
10342; CHECK-NEXT:    br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
10343; CHECK:       12:
10344; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10345; CHECK-NEXT:    unreachable
10346; CHECK:       13:
10347; CHECK-NEXT:    [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
10348; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
10349; CHECK-NEXT:    [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
10350; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
10351; CHECK-NEXT:    [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
10352; CHECK-NEXT:    [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
10353; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
10354; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
10355; CHECK-NEXT:    [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
10356; CHECK-NEXT:    [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
10357; CHECK-NEXT:    [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
10358; CHECK-NEXT:    br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
10359; CHECK:       17:
10360; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10361; CHECK-NEXT:    unreachable
10362; CHECK:       18:
10363; CHECK-NEXT:    [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 5, i8 [[X4]], i32 8)
10364; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
10365; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
10366; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
10367; CHECK-NEXT:    ret <2 x double> [[RES4]]
10368;
10369  %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
10370  %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
10371  %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8)
10372  %res3 = fadd <2 x double> %res, %res1
10373  %res4 = fadd <2 x double> %res3, %res2
10374  ret <2 x double> %res4
10375}
10376
10377declare double @llvm.fma.f64(double, double, double) #1
10378declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #0
10379
10380define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
10381; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_sd(
10382; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10383; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10384; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10385; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10386; CHECK-NEXT:    call void @llvm.donothing()
10387; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10388; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
10389; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10390; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
10391; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10392; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
10393; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
10394; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
10395; CHECK-NEXT:    [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
10396; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10397; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10398; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
10399; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
10400; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP]]
10401; CHECK-NEXT:    [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
10402; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double [[TMP5]] to i64
10403; CHECK-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
10404; CHECK-NEXT:    [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
10405; CHECK-NEXT:    [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP]]
10406; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
10407; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP5]]
10408; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
10409; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> [[X0]], double [[TMP18]], i64 0
10410; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10411; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
10412; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10413; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
10414; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10415; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
10416; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
10417; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
10418; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
10419; CHECK-NEXT:    [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
10420; CHECK-NEXT:    [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
10421; CHECK-NEXT:    br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
10422; CHECK:       23:
10423; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10424; CHECK-NEXT:    unreachable
10425; CHECK:       24:
10426; CHECK-NEXT:    [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
10427; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
10428; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
10429; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10430; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
10431; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10432; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
10433; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10434; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
10435; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
10436; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
10437; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
10438; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
10439; CHECK-NEXT:    [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
10440; CHECK-NEXT:    br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
10441; CHECK:       30:
10442; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10443; CHECK-NEXT:    unreachable
10444; CHECK:       31:
10445; CHECK-NEXT:    [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
10446; CHECK-NEXT:    [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10447; CHECK-NEXT:    [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10448; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
10449; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
10450; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP11]]
10451; CHECK-NEXT:    [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
10452; CHECK-NEXT:    [[TMP38:%.*]] = bitcast double [[TMP27]] to i64
10453; CHECK-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
10454; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[TMP39]], 0
10455; CHECK-NEXT:    [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP11]]
10456; CHECK-NEXT:    [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
10457; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP27]]
10458; CHECK-NEXT:    [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT15]], i64 0
10459; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <2 x double> [[X0]], double [[TMP42]], i64 0
10460; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
10461; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
10462; CHECK-NEXT:    [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
10463; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
10464; CHECK-NEXT:    store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
10465; CHECK-NEXT:    ret <2 x double> [[RES4]]
10466;
10467  %1 = extractelement <2 x double> %x0, i64 0
10468  %2 = extractelement <2 x double> %x1, i64 0
10469  %3 = extractelement <2 x double> %x2, i64 0
10470  %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
10471  %5 = bitcast i8 %x3 to <8 x i1>
10472  %6 = extractelement <8 x i1> %5, i64 0
10473  %7 = select i1 %6, double %4, double %1
10474  %8 = insertelement <2 x double> %x0, double %7, i64 0
10475  %9 = extractelement <2 x double> %x0, i64 0
10476  %10 = extractelement <2 x double> %x1, i64 0
10477  %11 = extractelement <2 x double> %x2, i64 0
10478  %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
10479  %13 = insertelement <2 x double> %x0, double %12, i64 0
10480  %14 = extractelement <2 x double> %x0, i64 0
10481  %15 = extractelement <2 x double> %x1, i64 0
10482  %16 = extractelement <2 x double> %x2, i64 0
10483  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
10484  %18 = bitcast i8 %x3 to <8 x i1>
10485  %19 = extractelement <8 x i1> %18, i64 0
10486  %20 = select i1 %19, double %17, double %14
10487  %21 = insertelement <2 x double> %x0, double %20, i64 0
10488  %res3 = fadd <2 x double> %8, %13
10489  %res4 = fadd <2 x double> %21, %res3
10490  ret <2 x double> %res4
10491}
10492
10493define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
10494; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss(
10495; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10496; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10497; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10498; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10499; CHECK-NEXT:    call void @llvm.donothing()
10500; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10501; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
10502; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10503; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
10504; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10505; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
10506; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
10507; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
10508; CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
10509; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10510; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10511; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
10512; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
10513; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP]]
10514; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
10515; CHECK-NEXT:    [[TMP14:%.*]] = bitcast float [[TMP5]] to i32
10516; CHECK-NEXT:    [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
10517; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
10518; CHECK-NEXT:    [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP]]
10519; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
10520; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP5]]
10521; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
10522; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
10523; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10524; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
10525; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10526; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
10527; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10528; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
10529; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
10530; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
10531; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
10532; CHECK-NEXT:    [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
10533; CHECK-NEXT:    [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
10534; CHECK-NEXT:    br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
10535; CHECK:       23:
10536; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10537; CHECK-NEXT:    unreachable
10538; CHECK:       24:
10539; CHECK-NEXT:    [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
10540; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
10541; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
10542; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10543; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
10544; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10545; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
10546; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10547; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
10548; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
10549; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
10550; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
10551; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
10552; CHECK-NEXT:    [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
10553; CHECK-NEXT:    br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
10554; CHECK:       30:
10555; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10556; CHECK-NEXT:    unreachable
10557; CHECK:       31:
10558; CHECK-NEXT:    [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
10559; CHECK-NEXT:    [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10560; CHECK-NEXT:    [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10561; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
10562; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
10563; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP11]]
10564; CHECK-NEXT:    [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
10565; CHECK-NEXT:    [[TMP38:%.*]] = bitcast float [[TMP27]] to i32
10566; CHECK-NEXT:    [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
10567; CHECK-NEXT:    [[TMP40:%.*]] = or i32 [[TMP39]], 0
10568; CHECK-NEXT:    [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP11]]
10569; CHECK-NEXT:    [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
10570; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP27]]
10571; CHECK-NEXT:    [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT15]], i64 0
10572; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x float> [[X0]], float [[TMP42]], i64 0
10573; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
10574; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
10575; CHECK-NEXT:    [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
10576; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
10577; CHECK-NEXT:    store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
10578; CHECK-NEXT:    ret <4 x float> [[RES4]]
10579;
10580  %1 = extractelement <4 x float> %x0, i64 0
10581  %2 = extractelement <4 x float> %x1, i64 0
10582  %3 = extractelement <4 x float> %x2, i64 0
10583  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
10584  %5 = bitcast i8 %x3 to <8 x i1>
10585  %6 = extractelement <8 x i1> %5, i64 0
10586  %7 = select i1 %6, float %4, float %1
10587  %8 = insertelement <4 x float> %x0, float %7, i64 0
10588  %9 = extractelement <4 x float> %x0, i64 0
10589  %10 = extractelement <4 x float> %x1, i64 0
10590  %11 = extractelement <4 x float> %x2, i64 0
10591  %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
10592  %13 = insertelement <4 x float> %x0, float %12, i64 0
10593  %14 = extractelement <4 x float> %x0, i64 0
10594  %15 = extractelement <4 x float> %x1, i64 0
10595  %16 = extractelement <4 x float> %x2, i64 0
10596  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
10597  %18 = bitcast i8 %x3 to <8 x i1>
10598  %19 = extractelement <8 x i1> %18, i64 0
10599  %20 = select i1 %19, float %17, float %14
10600  %21 = insertelement <4 x float> %x0, float %20, i64 0
10601  %res3 = fadd <4 x float> %8, %13
10602  %res4 = fadd <4 x float> %21, %res3
10603  ret <4 x float> %res4
10604}
10605
10606define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
10607; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_sd(
10608; CHECK-NEXT:    call void @llvm.donothing()
10609; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
10610; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
10611; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
10612; CHECK-NEXT:    [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
10613; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10614; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
10615; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i64 0, i64 0
10616; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double [[TMP4]] to i64
10617; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 0
10618; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP9]], 0
10619; CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP10]], 0
10620; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 false, i64 [[TMP11]], i64 [[TMP7]]
10621; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
10622; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x double> [[X0]], double [[TMP12]], i64 0
10623; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[X0]], i64 0
10624; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[X1]], i64 0
10625; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[X2]], i64 0
10626; CHECK-NEXT:    [[TMP17:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]], i32 11)
10627; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10628; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
10629; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i64 0, i64 0
10630; CHECK-NEXT:    [[TMP21:%.*]] = bitcast double [[TMP17]] to i64
10631; CHECK-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 0
10632; CHECK-NEXT:    [[TMP23:%.*]] = or i64 [[TMP22]], 0
10633; CHECK-NEXT:    [[TMP24:%.*]] = or i64 [[TMP23]], 0
10634; CHECK-NEXT:    [[_MSPROP_SELECT1:%.*]] = select i1 false, i64 [[TMP24]], i64 [[TMP20]]
10635; CHECK-NEXT:    [[TMP25:%.*]] = select i1 [[TMP19]], double [[TMP17]], double 0.000000e+00
10636; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
10637; CHECK-NEXT:    [[RES2:%.*]] = fadd <2 x double> [[TMP13]], [[TMP26]]
10638; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
10639; CHECK-NEXT:    ret <2 x double> [[RES2]]
10640;
10641  %1 = extractelement <2 x double> %x0, i64 0
10642  %2 = extractelement <2 x double> %x1, i64 0
10643  %3 = extractelement <2 x double> %x2, i64 0
10644  %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
10645  %5 = bitcast i8 %x3 to <8 x i1>
10646  %6 = extractelement <8 x i1> %5, i64 0
10647  %7 = select i1 %6, double %4, double 0.000000e+00
10648  %8 = insertelement <2 x double> %x0, double %7, i64 0
10649  %9 = extractelement <2 x double> %x0, i64 0
10650  %10 = extractelement <2 x double> %x1, i64 0
10651  %11 = extractelement <2 x double> %x2, i64 0
10652  %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
10653  %13 = bitcast i8 %x3 to <8 x i1>
10654  %14 = extractelement <8 x i1> %13, i64 0
10655  %15 = select i1 %14, double %12, double 0.000000e+00
10656  %16 = insertelement <2 x double> %x0, double %15, i64 0
10657  %res2 = fadd <2 x double> %8, %16
10658  ret <2 x double> %res2
10659}
10660
10661declare float @llvm.fma.f32(float, float, float) #1
10662declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #0
10663
10664define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
10665; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss(
10666; CHECK-NEXT:    call void @llvm.donothing()
10667; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
10668; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
10669; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
10670; CHECK-NEXT:    [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
10671; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10672; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
10673; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 0, i32 0
10674; CHECK-NEXT:    [[TMP8:%.*]] = bitcast float [[TMP4]] to i32
10675; CHECK-NEXT:    [[TMP9:%.*]] = xor i32 [[TMP8]], 0
10676; CHECK-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], 0
10677; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[TMP10]], 0
10678; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP11]], i32 [[TMP7]]
10679; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
10680; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[X0]], float [[TMP12]], i64 0
10681; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[X0]], i64 0
10682; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[X1]], i64 0
10683; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float> [[X2]], i64 0
10684; CHECK-NEXT:    [[TMP17:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]], i32 11)
10685; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10686; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
10687; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 0, i32 0
10688; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float [[TMP17]] to i32
10689; CHECK-NEXT:    [[TMP22:%.*]] = xor i32 [[TMP21]], 0
10690; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[TMP22]], 0
10691; CHECK-NEXT:    [[TMP24:%.*]] = or i32 [[TMP23]], 0
10692; CHECK-NEXT:    [[_MSPROP_SELECT1:%.*]] = select i1 false, i32 [[TMP24]], i32 [[TMP20]]
10693; CHECK-NEXT:    [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP17]], float 0.000000e+00
10694; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
10695; CHECK-NEXT:    [[RES2:%.*]] = fadd <4 x float> [[TMP13]], [[TMP26]]
10696; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
10697; CHECK-NEXT:    ret <4 x float> [[RES2]]
10698;
10699  %1 = extractelement <4 x float> %x0, i64 0
10700  %2 = extractelement <4 x float> %x1, i64 0
10701  %3 = extractelement <4 x float> %x2, i64 0
10702  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
10703  %5 = bitcast i8 %x3 to <8 x i1>
10704  %6 = extractelement <8 x i1> %5, i64 0
10705  %7 = select i1 %6, float %4, float 0.000000e+00
10706  %8 = insertelement <4 x float> %x0, float %7, i64 0
10707  %9 = extractelement <4 x float> %x0, i64 0
10708  %10 = extractelement <4 x float> %x1, i64 0
10709  %11 = extractelement <4 x float> %x2, i64 0
10710  %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
10711  %13 = bitcast i8 %x3 to <8 x i1>
10712  %14 = extractelement <8 x i1> %13, i64 0
10713  %15 = select i1 %14, float %12, float 0.000000e+00
10714  %16 = insertelement <4 x float> %x0, float %15, i64 0
10715  %res2 = fadd <4 x float> %8, %16
10716  ret <4 x float> %res2
10717}
10718
10719define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr nocapture readonly %1, float %2, float %3) #0 {
10720; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_load0(
10721; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
10722; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10723; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
10724; CHECK-NEXT:    [[TMP8:%.*]] = load i8, ptr @__msan_param_tls, align 8
10725; CHECK-NEXT:    call void @llvm.donothing()
10726; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
10727; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
10728; CHECK:       9:
10729; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10730; CHECK-NEXT:    unreachable
10731; CHECK:       10:
10732; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 16
10733; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[TMP1]] to i64
10734; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
10735; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
10736; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
10737; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
10738; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
10739; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP6]]
10740; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP7]]
10741; CHECK-NEXT:    [[TMP16:%.*]] = tail call float @llvm.fma.f32(float [[TMP15]], float [[TMP2:%.*]], float [[TMP3:%.*]])
10742; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
10743; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[TMP0:%.*]] to <8 x i1>
10744; CHECK-NEXT:    [[_MSPROP3:%.*]] = extractelement <8 x i1> [[TMP17]], i64 0
10745; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
10746; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[_MSPROP2]], i32 0
10747; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float [[TMP16]] to i32
10748; CHECK-NEXT:    [[TMP22:%.*]] = xor i32 [[TMP21]], 0
10749; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[TMP22]], [[_MSPROP2]]
10750; CHECK-NEXT:    [[TMP24:%.*]] = or i32 [[TMP23]], 0
10751; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP3]], i32 [[TMP24]], i32 [[TMP20]]
10752; CHECK-NEXT:    [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP16]], float 0.000000e+00
10753; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <4 x i32> [[_MSLD]], i32 [[_MSPROP_SELECT]], i64 0
10754; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP25]], i64 0
10755; CHECK-NEXT:    store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
10756; CHECK-NEXT:    ret <4 x float> [[TMP26]]
10757;
10758  %5 = load <4 x float>, ptr %1, align 16
10759  %6 = extractelement <4 x float> %5, i64 0
10760  %7 = tail call float @llvm.fma.f32(float %6, float %2, float %3) #2
10761  %8 = bitcast i8 %0 to <8 x i1>
10762  %9 = extractelement <8 x i1> %8, i64 0
10763  %10 = select i1 %9, float %7, float 0.000000e+00
10764  %11 = insertelement <4 x float> %5, float %10, i64 0
10765  ret <4 x float> %11
10766}
10767
10768define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
10769; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_sd(
10770; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10771; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10772; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10773; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10774; CHECK-NEXT:    call void @llvm.donothing()
10775; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10776; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
10777; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10778; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
10779; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10780; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
10781; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
10782; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
10783; CHECK-NEXT:    [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
10784; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10785; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10786; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
10787; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
10788; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP2]]
10789; CHECK-NEXT:    [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
10790; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double [[TMP7]] to i64
10791; CHECK-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
10792; CHECK-NEXT:    [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
10793; CHECK-NEXT:    [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP2]]
10794; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
10795; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP7]]
10796; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0
10797; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x double> [[X2]], double [[TMP18]], i64 0
10798; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10799; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
10800; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10801; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
10802; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10803; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
10804; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
10805; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
10806; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
10807; CHECK-NEXT:    [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
10808; CHECK-NEXT:    [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
10809; CHECK-NEXT:    br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
10810; CHECK:       23:
10811; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10812; CHECK-NEXT:    unreachable
10813; CHECK:       24:
10814; CHECK-NEXT:    [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
10815; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i64 0
10816; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <2 x double> [[X2]], double [[TMP25]], i64 0
10817; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
10818; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
10819; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
10820; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
10821; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
10822; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
10823; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
10824; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
10825; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
10826; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
10827; CHECK-NEXT:    [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
10828; CHECK-NEXT:    br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
10829; CHECK:       30:
10830; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10831; CHECK-NEXT:    unreachable
10832; CHECK:       31:
10833; CHECK-NEXT:    [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
10834; CHECK-NEXT:    [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10835; CHECK-NEXT:    [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10836; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
10837; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
10838; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP13]]
10839; CHECK-NEXT:    [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
10840; CHECK-NEXT:    [[TMP38:%.*]] = bitcast double [[TMP29]] to i64
10841; CHECK-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
10842; CHECK-NEXT:    [[TMP40:%.*]] = or i64 [[TMP39]], 0
10843; CHECK-NEXT:    [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP13]]
10844; CHECK-NEXT:    [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
10845; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP29]]
10846; CHECK-NEXT:    [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT15]], i64 0
10847; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <2 x double> [[X2]], double [[TMP42]], i64 0
10848; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
10849; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
10850; CHECK-NEXT:    [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
10851; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
10852; CHECK-NEXT:    store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
10853; CHECK-NEXT:    ret <2 x double> [[RES4]]
10854;
10855  %1 = extractelement <2 x double> %x0, i64 0
10856  %2 = extractelement <2 x double> %x1, i64 0
10857  %3 = extractelement <2 x double> %x2, i64 0
10858  %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
10859  %5 = bitcast i8 %x3 to <8 x i1>
10860  %6 = extractelement <8 x i1> %5, i64 0
10861  %7 = select i1 %6, double %4, double %3
10862  %8 = insertelement <2 x double> %x2, double %7, i64 0
10863  %9 = extractelement <2 x double> %x0, i64 0
10864  %10 = extractelement <2 x double> %x1, i64 0
10865  %11 = extractelement <2 x double> %x2, i64 0
10866  %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
10867  %13 = insertelement <2 x double> %x2, double %12, i64 0
10868  %14 = extractelement <2 x double> %x0, i64 0
10869  %15 = extractelement <2 x double> %x1, i64 0
10870  %16 = extractelement <2 x double> %x2, i64 0
10871  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
10872  %18 = bitcast i8 %x3 to <8 x i1>
10873  %19 = extractelement <8 x i1> %18, i64 0
10874  %20 = select i1 %19, double %17, double %16
10875  %21 = insertelement <2 x double> %x2, double %20, i64 0
10876  %res3 = fadd <2 x double> %8, %13
10877  %res4 = fadd <2 x double> %21, %res3
10878  ret <2 x double> %res4
10879}
10880
10881define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
10882; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss(
10883; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
10884; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
10885; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
10886; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
10887; CHECK-NEXT:    call void @llvm.donothing()
10888; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10889; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
10890; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10891; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
10892; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10893; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
10894; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
10895; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
10896; CHECK-NEXT:    [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
10897; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10898; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
10899; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
10900; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
10901; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP2]]
10902; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
10903; CHECK-NEXT:    [[TMP14:%.*]] = bitcast float [[TMP7]] to i32
10904; CHECK-NEXT:    [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
10905; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
10906; CHECK-NEXT:    [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP2]]
10907; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
10908; CHECK-NEXT:    [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP7]]
10909; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
10910; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> [[X2]], float [[TMP18]], i64 0
10911; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10912; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
10913; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10914; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
10915; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10916; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
10917; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
10918; CHECK-NEXT:    [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
10919; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
10920; CHECK-NEXT:    [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
10921; CHECK-NEXT:    [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
10922; CHECK-NEXT:    br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
10923; CHECK:       23:
10924; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10925; CHECK-NEXT:    unreachable
10926; CHECK:       24:
10927; CHECK-NEXT:    [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
10928; CHECK-NEXT:    [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP3]], i32 0, i64 0
10929; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x float> [[X2]], float [[TMP25]], i64 0
10930; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
10931; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
10932; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
10933; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
10934; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
10935; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
10936; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
10937; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
10938; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
10939; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
10940; CHECK-NEXT:    [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
10941; CHECK-NEXT:    br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
10942; CHECK:       30:
10943; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
10944; CHECK-NEXT:    unreachable
10945; CHECK:       31:
10946; CHECK-NEXT:    [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
10947; CHECK-NEXT:    [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
10948; CHECK-NEXT:    [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
10949; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
10950; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
10951; CHECK-NEXT:    [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP13]]
10952; CHECK-NEXT:    [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
10953; CHECK-NEXT:    [[TMP38:%.*]] = bitcast float [[TMP29]] to i32
10954; CHECK-NEXT:    [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
10955; CHECK-NEXT:    [[TMP40:%.*]] = or i32 [[TMP39]], 0
10956; CHECK-NEXT:    [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP13]]
10957; CHECK-NEXT:    [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
10958; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP29]]
10959; CHECK-NEXT:    [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT15]], i64 0
10960; CHECK-NEXT:    [[TMP43:%.*]] = insertelement <4 x float> [[X2]], float [[TMP42]], i64 0
10961; CHECK-NEXT:    [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
10962; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
10963; CHECK-NEXT:    [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
10964; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
10965; CHECK-NEXT:    store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
10966; CHECK-NEXT:    ret <4 x float> [[RES4]]
10967;
10968  %1 = extractelement <4 x float> %x0, i64 0
10969  %2 = extractelement <4 x float> %x1, i64 0
10970  %3 = extractelement <4 x float> %x2, i64 0
10971  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
10972  %5 = bitcast i8 %x3 to <8 x i1>
10973  %6 = extractelement <8 x i1> %5, i64 0
10974  %7 = select i1 %6, float %4, float %3
10975  %8 = insertelement <4 x float> %x2, float %7, i64 0
10976  %9 = extractelement <4 x float> %x0, i64 0
10977  %10 = extractelement <4 x float> %x1, i64 0
10978  %11 = extractelement <4 x float> %x2, i64 0
10979  %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
10980  %13 = insertelement <4 x float> %x2, float %12, i64 0
10981  %14 = extractelement <4 x float> %x0, i64 0
10982  %15 = extractelement <4 x float> %x1, i64 0
10983  %16 = extractelement <4 x float> %x2, i64 0
10984  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
10985  %18 = bitcast i8 %x3 to <8 x i1>
10986  %19 = extractelement <8 x i1> %18, i64 0
10987  %20 = select i1 %19, float %17, float %16
10988  %21 = insertelement <4 x float> %x2, float %20, i64 0
10989  %res3 = fadd <4 x float> %8, %13
10990  %res4 = fadd <4 x float> %21, %res3
10991  ret <4 x float> %res4
10992}
10993
10994define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c, <4 x float> %extra_param, <4 x float> %extra_param2) #0 {
10995; CHECK-LABEL: @fmadd_ss_mask_memfold(
10996; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
10997; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
10998; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
10999; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11000; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11001; CHECK-NEXT:    call void @llvm.donothing()
11002; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11003; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
11004; CHECK:       6:
11005; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11006; CHECK-NEXT:    unreachable
11007; CHECK:       7:
11008; CHECK-NEXT:    [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
11009; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
11010; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
11011; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
11012; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
11013; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[_MSLD]], i32 0
11014; CHECK-NEXT:    [[AV0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A_VAL]], i32 0
11015; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
11016; CHECK-NEXT:    [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
11017; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
11018; CHECK-NEXT:    [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
11019; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
11020; CHECK-NEXT:    [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
11021; CHECK-NEXT:    [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
11022; CHECK-NEXT:    br i1 [[_MSCMP17]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
11023; CHECK:       11:
11024; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11025; CHECK-NEXT:    unreachable
11026; CHECK:       12:
11027; CHECK-NEXT:    [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
11028; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
11029; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
11030; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
11031; CHECK-NEXT:    [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
11032; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD4]], i32 0
11033; CHECK-NEXT:    [[BV0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM2:%.*]], float [[B_VAL]], i32 0
11034; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
11035; CHECK-NEXT:    [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
11036; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
11037; CHECK-NEXT:    [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
11038; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
11039; CHECK-NEXT:    [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
11040; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
11041; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
11042; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
11043; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
11044; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
11045; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
11046; CHECK-NEXT:    [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
11047; CHECK-NEXT:    [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
11048; CHECK-NEXT:    [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
11049; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
11050; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
11051; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
11052; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
11053; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 [[_MSPROP9]]
11054; CHECK-NEXT:    [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
11055; CHECK-NEXT:    [[TMP23:%.*]] = bitcast float [[TMP14]] to i32
11056; CHECK-NEXT:    [[TMP24:%.*]] = xor i32 [[TMP22]], [[TMP23]]
11057; CHECK-NEXT:    [[TMP25:%.*]] = or i32 [[TMP24]], [[_MSPROP13]]
11058; CHECK-NEXT:    [[TMP26:%.*]] = or i32 [[TMP25]], [[_MSPROP9]]
11059; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP26]], i32 [[TMP21]]
11060; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP20]], float [[TMP17]], float [[TMP14]]
11061; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
11062; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <4 x float> [[AV]], float [[TMP27]], i64 0
11063; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
11064; CHECK-NEXT:    [[SR:%.*]] = extractelement <4 x float> [[TMP28]], i32 0
11065; CHECK-NEXT:    [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
11066; CHECK-NEXT:    br i1 [[_MSCMP18]], label [[TMP34:%.*]], label [[TMP35:%.*]], !prof [[PROF1]]
11067; CHECK:       31:
11068; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11069; CHECK-NEXT:    unreachable
11070; CHECK:       32:
11071; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
11072; CHECK-NEXT:    [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
11073; CHECK-NEXT:    [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
11074; CHECK-NEXT:    store i32 [[_MSPROP16]], ptr [[TMP33]], align 4
11075; CHECK-NEXT:    store float [[SR]], ptr [[A]], align 4
11076; CHECK-NEXT:    ret void
11077;
11078  %a.val = load float, ptr %a
11079  %av0 = insertelement <4 x float> %extra_param, float %a.val, i32 0
11080  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
11081  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
11082  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
11083
11084  %b.val = load float, ptr %b
11085  %bv0 = insertelement <4 x float> %extra_param2, float %b.val, i32 0
11086  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
11087  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
11088  %bv =  insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
11089  %1 = extractelement <4 x float> %av, i64 0
11090  %2 = extractelement <4 x float> %bv, i64 0
11091  %3 = extractelement <4 x float> %av, i64 0
11092  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
11093  %5 = bitcast i8 %c to <8 x i1>
11094  %6 = extractelement <8 x i1> %5, i64 0
11095  %7 = select i1 %6, float %4, float %1
11096  %8 = insertelement <4 x float> %av, float %7, i64 0
11097  %sr = extractelement <4 x float> %8, i32 0
11098  store float %sr, ptr %a
11099  ret void
11100}
11101
11102define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c, <4 x float> %extra_param, <4 x float> %extra_param2) #0 {
11103; CHECK-LABEL: @fmadd_ss_maskz_memfold(
11104; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
11105; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
11106; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
11107; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11108; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11109; CHECK-NEXT:    call void @llvm.donothing()
11110; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11111; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
11112; CHECK:       6:
11113; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11114; CHECK-NEXT:    unreachable
11115; CHECK:       7:
11116; CHECK-NEXT:    [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
11117; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
11118; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
11119; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
11120; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
11121; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[_MSLD]], i32 0
11122; CHECK-NEXT:    [[AV0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[A_VAL]], i32 0
11123; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
11124; CHECK-NEXT:    [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
11125; CHECK-NEXT:    [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
11126; CHECK-NEXT:    [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
11127; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
11128; CHECK-NEXT:    [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
11129; CHECK-NEXT:    [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
11130; CHECK-NEXT:    br i1 [[_MSCMP17]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
11131; CHECK:       11:
11132; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11133; CHECK-NEXT:    unreachable
11134; CHECK:       12:
11135; CHECK-NEXT:    [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
11136; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
11137; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
11138; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
11139; CHECK-NEXT:    [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
11140; CHECK-NEXT:    [[_MSPROP5:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD4]], i32 0
11141; CHECK-NEXT:    [[BV0:%.*]] = insertelement <4 x float> [[EXTRA_PARAM2:%.*]], float [[B_VAL]], i32 0
11142; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
11143; CHECK-NEXT:    [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
11144; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
11145; CHECK-NEXT:    [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
11146; CHECK-NEXT:    [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
11147; CHECK-NEXT:    [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
11148; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
11149; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
11150; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
11151; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
11152; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
11153; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
11154; CHECK-NEXT:    [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
11155; CHECK-NEXT:    [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
11156; CHECK-NEXT:    [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
11157; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
11158; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
11159; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
11160; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
11161; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 0
11162; CHECK-NEXT:    [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
11163; CHECK-NEXT:    [[TMP23:%.*]] = xor i32 [[TMP22]], 0
11164; CHECK-NEXT:    [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP13]]
11165; CHECK-NEXT:    [[TMP25:%.*]] = or i32 [[TMP24]], 0
11166; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP25]], i32 [[TMP21]]
11167; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP20]], float [[TMP17]], float 0.000000e+00
11168; CHECK-NEXT:    [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
11169; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[AV]], float [[TMP26]], i64 0
11170; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
11171; CHECK-NEXT:    [[SR:%.*]] = extractelement <4 x float> [[TMP27]], i32 0
11172; CHECK-NEXT:    [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
11173; CHECK-NEXT:    br i1 [[_MSCMP18]], label [[TMP33:%.*]], label [[TMP34:%.*]], !prof [[PROF1]]
11174; CHECK:       30:
11175; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11176; CHECK-NEXT:    unreachable
11177; CHECK:       31:
11178; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
11179; CHECK-NEXT:    [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
11180; CHECK-NEXT:    [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
11181; CHECK-NEXT:    store i32 [[_MSPROP16]], ptr [[TMP32]], align 4
11182; CHECK-NEXT:    store float [[SR]], ptr [[A]], align 4
11183; CHECK-NEXT:    ret void
11184;
11185  %a.val = load float, ptr %a
11186  %av0 = insertelement <4 x float> %extra_param, float %a.val, i32 0
11187  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
11188  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
11189  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
11190
11191  %b.val = load float, ptr %b
11192  %bv0 = insertelement <4 x float> %extra_param2, float %b.val, i32 0
11193  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
11194  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
11195  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
11196  %1 = extractelement <4 x float> %av, i64 0
11197  %2 = extractelement <4 x float> %bv, i64 0
11198  %3 = extractelement <4 x float> %av, i64 0
11199  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
11200  %5 = bitcast i8 %c to <8 x i1>
11201  %6 = extractelement <8 x i1> %5, i64 0
11202  %7 = select i1 %6, float %4, float 0.000000e+00
11203  %8 = insertelement <4 x float> %av, float %7, i64 0
11204  %sr = extractelement <4 x float> %8, i32 0
11205  store float %sr, ptr %a
11206  ret void
11207}
11208
11209define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c, <2 x double> %extra_param, <2 x double> %extra_param2) #0 {
11210; CHECK-LABEL: @fmadd_sd_mask_memfold(
11211; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
11212; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
11213; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
11214; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11215; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11216; CHECK-NEXT:    call void @llvm.donothing()
11217; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11218; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
11219; CHECK:       6:
11220; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11221; CHECK-NEXT:    unreachable
11222; CHECK:       7:
11223; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
11224; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
11225; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
11226; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
11227; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
11228; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[_MSLD]], i32 0
11229; CHECK-NEXT:    [[AV0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A_VAL]], i32 0
11230; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
11231; CHECK-NEXT:    [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
11232; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
11233; CHECK-NEXT:    br i1 [[_MSCMP13]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
11234; CHECK:       11:
11235; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11236; CHECK-NEXT:    unreachable
11237; CHECK:       12:
11238; CHECK-NEXT:    [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
11239; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
11240; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
11241; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
11242; CHECK-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
11243; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[_MSLD2]], i32 0
11244; CHECK-NEXT:    [[BV0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM2:%.*]], double [[B_VAL]], i32 0
11245; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
11246; CHECK-NEXT:    [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
11247; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
11248; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
11249; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
11250; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
11251; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
11252; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
11253; CHECK-NEXT:    [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
11254; CHECK-NEXT:    [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
11255; CHECK-NEXT:    [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
11256; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
11257; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
11258; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
11259; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
11260; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 [[_MSPROP5]]
11261; CHECK-NEXT:    [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
11262; CHECK-NEXT:    [[TMP23:%.*]] = bitcast double [[TMP14]] to i64
11263; CHECK-NEXT:    [[TMP24:%.*]] = xor i64 [[TMP22]], [[TMP23]]
11264; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[TMP24]], [[_MSPROP9]]
11265; CHECK-NEXT:    [[TMP26:%.*]] = or i64 [[TMP25]], [[_MSPROP5]]
11266; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP26]], i64 [[TMP21]]
11267; CHECK-NEXT:    [[TMP27:%.*]] = select i1 [[TMP20]], double [[TMP17]], double [[TMP14]]
11268; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
11269; CHECK-NEXT:    [[TMP28:%.*]] = insertelement <2 x double> [[AV]], double [[TMP27]], i64 0
11270; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
11271; CHECK-NEXT:    [[SR:%.*]] = extractelement <2 x double> [[TMP28]], i32 0
11272; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
11273; CHECK-NEXT:    br i1 [[_MSCMP14]], label [[TMP34:%.*]], label [[TMP35:%.*]], !prof [[PROF1]]
11274; CHECK:       31:
11275; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11276; CHECK-NEXT:    unreachable
11277; CHECK:       32:
11278; CHECK-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
11279; CHECK-NEXT:    [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
11280; CHECK-NEXT:    [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
11281; CHECK-NEXT:    store i64 [[_MSPROP12]], ptr [[TMP33]], align 8
11282; CHECK-NEXT:    store double [[SR]], ptr [[A]], align 8
11283; CHECK-NEXT:    ret void
11284;
11285  %a.val = load double, ptr %a
11286  %av0 = insertelement <2 x double> %extra_param, double %a.val, i32 0
11287  %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
11288
11289  %b.val = load double, ptr %b
11290  %bv0 = insertelement <2 x double> %extra_param2, double %b.val, i32 0
11291  %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
11292  %1 = extractelement <2 x double> %av, i64 0
11293  %2 = extractelement <2 x double> %bv, i64 0
11294  %3 = extractelement <2 x double> %av, i64 0
11295  %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
11296  %5 = bitcast i8 %c to <8 x i1>
11297  %6 = extractelement <8 x i1> %5, i64 0
11298  %7 = select i1 %6, double %4, double %1
11299  %8 = insertelement <2 x double> %av, double %7, i64 0
11300  %sr = extractelement <2 x double> %8, i32 0
11301  store double %sr, ptr %a
11302  ret void
11303}
11304
11305define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c, <2x double> %extra_param, <2x double> %extra_param2) #0 {
11306; CHECK-LABEL: @fmadd_sd_maskz_memfold(
11307; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
11308; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
11309; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
11310; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11311; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11312; CHECK-NEXT:    call void @llvm.donothing()
11313; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11314; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
11315; CHECK:       6:
11316; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11317; CHECK-NEXT:    unreachable
11318; CHECK:       7:
11319; CHECK-NEXT:    [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
11320; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
11321; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
11322; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
11323; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
11324; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <2 x i64> [[TMP4]], i64 [[_MSLD]], i32 0
11325; CHECK-NEXT:    [[AV0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM:%.*]], double [[A_VAL]], i32 0
11326; CHECK-NEXT:    [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
11327; CHECK-NEXT:    [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
11328; CHECK-NEXT:    [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
11329; CHECK-NEXT:    br i1 [[_MSCMP13]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
11330; CHECK:       11:
11331; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11332; CHECK-NEXT:    unreachable
11333; CHECK:       12:
11334; CHECK-NEXT:    [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
11335; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
11336; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
11337; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
11338; CHECK-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
11339; CHECK-NEXT:    [[_MSPROP3:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[_MSLD2]], i32 0
11340; CHECK-NEXT:    [[BV0:%.*]] = insertelement <2 x double> [[EXTRA_PARAM2:%.*]], double [[B_VAL]], i32 0
11341; CHECK-NEXT:    [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
11342; CHECK-NEXT:    [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
11343; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
11344; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
11345; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
11346; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
11347; CHECK-NEXT:    [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
11348; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
11349; CHECK-NEXT:    [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
11350; CHECK-NEXT:    [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
11351; CHECK-NEXT:    [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
11352; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
11353; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
11354; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
11355; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
11356; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 0
11357; CHECK-NEXT:    [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
11358; CHECK-NEXT:    [[TMP23:%.*]] = xor i64 [[TMP22]], 0
11359; CHECK-NEXT:    [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP9]]
11360; CHECK-NEXT:    [[TMP25:%.*]] = or i64 [[TMP24]], 0
11361; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP25]], i64 [[TMP21]]
11362; CHECK-NEXT:    [[TMP26:%.*]] = select i1 [[TMP20]], double [[TMP17]], double 0.000000e+00
11363; CHECK-NEXT:    [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
11364; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <2 x double> [[AV]], double [[TMP26]], i64 0
11365; CHECK-NEXT:    [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
11366; CHECK-NEXT:    [[SR:%.*]] = extractelement <2 x double> [[TMP27]], i32 0
11367; CHECK-NEXT:    [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
11368; CHECK-NEXT:    br i1 [[_MSCMP14]], label [[TMP33:%.*]], label [[TMP34:%.*]], !prof [[PROF1]]
11369; CHECK:       30:
11370; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11371; CHECK-NEXT:    unreachable
11372; CHECK:       31:
11373; CHECK-NEXT:    [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
11374; CHECK-NEXT:    [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
11375; CHECK-NEXT:    [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
11376; CHECK-NEXT:    store i64 [[_MSPROP12]], ptr [[TMP32]], align 8
11377; CHECK-NEXT:    store double [[SR]], ptr [[A]], align 8
11378; CHECK-NEXT:    ret void
11379;
11380  %a.val = load double, ptr %a
11381  %av0 = insertelement <2 x double> %extra_param, double %a.val, i32 0
11382  %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
11383
11384  %b.val = load double, ptr %b
11385  %bv0 = insertelement <2 x double> %extra_param2, double %b.val, i32 0
11386  %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
11387  %1 = extractelement <2 x double> %av, i64 0
11388  %2 = extractelement <2 x double> %bv, i64 0
11389  %3 = extractelement <2 x double> %av, i64 0
11390  %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
11391  %5 = bitcast i8 %c to <8 x i1>
11392  %6 = extractelement <8 x i1> %5, i64 0
11393  %7 = select i1 %6, double %4, double 0.000000e+00
11394  %8 = insertelement <2 x double> %av, double %7, i64 0
11395  %sr = extractelement <2 x double> %8, i32 0
11396  store double %sr, ptr %a
11397  ret void
11398}
11399
11400define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
11401; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_sd(
11402; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11403; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
11404; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11405; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
11406; CHECK-NEXT:    call void @llvm.donothing()
11407; CHECK-NEXT:    [[TMP5:%.*]] = fneg <2 x double> [[X2:%.*]]
11408; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11409; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
11410; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11411; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
11412; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11413; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
11414; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
11415; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
11416; CHECK-NEXT:    [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP6]], double [[TMP7]], double [[TMP8]])
11417; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11418; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x double> [[X2]], i64 0
11419; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11420; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
11421; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
11422; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
11423; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
11424; CHECK-NEXT:    [[TMP15:%.*]] = bitcast double [[TMP9]] to i64
11425; CHECK-NEXT:    [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
11426; CHECK-NEXT:    [[TMP17:%.*]] = xor i64 [[TMP15]], [[TMP16]]
11427; CHECK-NEXT:    [[TMP18:%.*]] = or i64 [[TMP17]], [[_MSPROP4]]
11428; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP5]]
11429; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP19]], i64 [[TMP14]]
11430; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP13]], double [[TMP9]], double [[TMP10]]
11431; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
11432; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <2 x double> [[X2]], double [[TMP20]], i64 0
11433; CHECK-NEXT:    [[TMP22:%.*]] = fneg <2 x double> [[X2]]
11434; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11435; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x double> [[X0]], i64 0
11436; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11437; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x double> [[X1]], i64 0
11438; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11439; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP22]], i64 0
11440; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
11441; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
11442; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
11443; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
11444; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
11445; CHECK-NEXT:    br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
11446; CHECK:       26:
11447; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11448; CHECK-NEXT:    unreachable
11449; CHECK:       27:
11450; CHECK-NEXT:    [[TMP28:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP23]], double [[TMP24]], double [[TMP25]], i32 11)
11451; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11452; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
11453; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
11454; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <2 x double> [[X2]], double [[TMP28]], i64 0
11455; CHECK-NEXT:    [[TMP31:%.*]] = fneg <2 x double> [[X2]]
11456; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11457; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <2 x double> [[X0]], i64 0
11458; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11459; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[X1]], i64 0
11460; CHECK-NEXT:    [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11461; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <2 x double> [[TMP31]], i64 0
11462; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
11463; CHECK-NEXT:    [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
11464; CHECK-NEXT:    [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
11465; CHECK-NEXT:    [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
11466; CHECK-NEXT:    [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
11467; CHECK-NEXT:    br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
11468; CHECK:       35:
11469; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11470; CHECK-NEXT:    unreachable
11471; CHECK:       36:
11472; CHECK-NEXT:    [[TMP37:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP32]], double [[TMP33]], double [[TMP34]], i32 10)
11473; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11474; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x double> [[X2]], i64 0
11475; CHECK-NEXT:    [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11476; CHECK-NEXT:    [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
11477; CHECK-NEXT:    [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
11478; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
11479; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i64 0, i64 [[_MSPROP16]]
11480; CHECK-NEXT:    [[TMP43:%.*]] = bitcast double [[TMP37]] to i64
11481; CHECK-NEXT:    [[TMP44:%.*]] = bitcast double [[TMP38]] to i64
11482; CHECK-NEXT:    [[TMP45:%.*]] = xor i64 [[TMP43]], [[TMP44]]
11483; CHECK-NEXT:    [[TMP46:%.*]] = or i64 [[TMP45]], 0
11484; CHECK-NEXT:    [[TMP47:%.*]] = or i64 [[TMP46]], [[_MSPROP16]]
11485; CHECK-NEXT:    [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP47]], i64 [[TMP42]]
11486; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP41]], double [[TMP37]], double [[TMP38]]
11487; CHECK-NEXT:    [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT18]], i64 0
11488; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <2 x double> [[X2]], double [[TMP48]], i64 0
11489; CHECK-NEXT:    [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
11490; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[TMP21]], [[TMP30]]
11491; CHECK-NEXT:    [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
11492; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[TMP49]], [[RES3]]
11493; CHECK-NEXT:    store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
11494; CHECK-NEXT:    ret <2 x double> [[RES4]]
11495;
11496  %1 = fneg <2 x double> %x2
11497  %2 = extractelement <2 x double> %x0, i64 0
11498  %3 = extractelement <2 x double> %x1, i64 0
11499  %4 = extractelement <2 x double> %1, i64 0
11500  %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
11501  %6 = extractelement <2 x double> %x2, i64 0
11502  %7 = bitcast i8 %x3 to <8 x i1>
11503  %8 = extractelement <8 x i1> %7, i64 0
11504  %9 = select i1 %8, double %5, double %6
11505  %10 = insertelement <2 x double> %x2, double %9, i64 0
11506  %11 = fneg <2 x double> %x2
11507  %12 = extractelement <2 x double> %x0, i64 0
11508  %13 = extractelement <2 x double> %x1, i64 0
11509  %14 = extractelement <2 x double> %11, i64 0
11510  %15 = call double @llvm.x86.avx512.vfmadd.f64(double %12, double %13, double %14, i32 11)
11511  %16 = extractelement <2 x double> %x2, i64 0
11512  %17 = insertelement <2 x double> %x2, double %15, i64 0
11513  %18 = fneg <2 x double> %x2
11514  %19 = extractelement <2 x double> %x0, i64 0
11515  %20 = extractelement <2 x double> %x1, i64 0
11516  %21 = extractelement <2 x double> %18, i64 0
11517  %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 10)
11518  %23 = extractelement <2 x double> %x2, i64 0
11519  %24 = bitcast i8 %x3 to <8 x i1>
11520  %25 = extractelement <8 x i1> %24, i64 0
11521  %26 = select i1 %25, double %22, double %23
11522  %27 = insertelement <2 x double> %x2, double %26, i64 0
11523  %res3 = fadd <2 x double> %10, %17
11524  %res4 = fadd <2 x double> %27, %res3
11525  ret <2 x double> %res4
11526}
11527
11528define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
11529; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_ss(
11530; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11531; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
11532; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11533; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
11534; CHECK-NEXT:    call void @llvm.donothing()
11535; CHECK-NEXT:    [[TMP5:%.*]] = fneg <4 x float> [[X2:%.*]]
11536; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11537; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
11538; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11539; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
11540; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11541; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
11542; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
11543; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
11544; CHECK-NEXT:    [[TMP9:%.*]] = call float @llvm.fma.f32(float [[TMP6]], float [[TMP7]], float [[TMP8]])
11545; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11546; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[X2]], i64 0
11547; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11548; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
11549; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
11550; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
11551; CHECK-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
11552; CHECK-NEXT:    [[TMP15:%.*]] = bitcast float [[TMP9]] to i32
11553; CHECK-NEXT:    [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
11554; CHECK-NEXT:    [[TMP17:%.*]] = xor i32 [[TMP15]], [[TMP16]]
11555; CHECK-NEXT:    [[TMP18:%.*]] = or i32 [[TMP17]], [[_MSPROP4]]
11556; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP5]]
11557; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP19]], i32 [[TMP14]]
11558; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[TMP13]], float [[TMP9]], float [[TMP10]]
11559; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
11560; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[X2]], float [[TMP20]], i64 0
11561; CHECK-NEXT:    [[TMP22:%.*]] = fneg <4 x float> [[X2]]
11562; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11563; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x float> [[X0]], i64 0
11564; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11565; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x float> [[X1]], i64 0
11566; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11567; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x float> [[TMP22]], i64 0
11568; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
11569; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
11570; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
11571; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
11572; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
11573; CHECK-NEXT:    br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
11574; CHECK:       26:
11575; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11576; CHECK-NEXT:    unreachable
11577; CHECK:       27:
11578; CHECK-NEXT:    [[TMP28:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP23]], float [[TMP24]], float [[TMP25]], i32 11)
11579; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11580; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
11581; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
11582; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x float> [[X2]], float [[TMP28]], i64 0
11583; CHECK-NEXT:    [[TMP31:%.*]] = fneg <4 x float> [[X2]]
11584; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11585; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x float> [[X0]], i64 0
11586; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11587; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x float> [[X1]], i64 0
11588; CHECK-NEXT:    [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11589; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x float> [[TMP31]], i64 0
11590; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
11591; CHECK-NEXT:    [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
11592; CHECK-NEXT:    [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
11593; CHECK-NEXT:    [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
11594; CHECK-NEXT:    [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
11595; CHECK-NEXT:    br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
11596; CHECK:       35:
11597; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11598; CHECK-NEXT:    unreachable
11599; CHECK:       36:
11600; CHECK-NEXT:    [[TMP37:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP32]], float [[TMP33]], float [[TMP34]], i32 10)
11601; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11602; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <4 x float> [[X2]], i64 0
11603; CHECK-NEXT:    [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11604; CHECK-NEXT:    [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
11605; CHECK-NEXT:    [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
11606; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
11607; CHECK-NEXT:    [[TMP42:%.*]] = select i1 [[TMP41]], i32 0, i32 [[_MSPROP16]]
11608; CHECK-NEXT:    [[TMP43:%.*]] = bitcast float [[TMP37]] to i32
11609; CHECK-NEXT:    [[TMP44:%.*]] = bitcast float [[TMP38]] to i32
11610; CHECK-NEXT:    [[TMP45:%.*]] = xor i32 [[TMP43]], [[TMP44]]
11611; CHECK-NEXT:    [[TMP46:%.*]] = or i32 [[TMP45]], 0
11612; CHECK-NEXT:    [[TMP47:%.*]] = or i32 [[TMP46]], [[_MSPROP16]]
11613; CHECK-NEXT:    [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP47]], i32 [[TMP42]]
11614; CHECK-NEXT:    [[TMP48:%.*]] = select i1 [[TMP41]], float [[TMP37]], float [[TMP38]]
11615; CHECK-NEXT:    [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT18]], i64 0
11616; CHECK-NEXT:    [[TMP49:%.*]] = insertelement <4 x float> [[X2]], float [[TMP48]], i64 0
11617; CHECK-NEXT:    [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
11618; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[TMP21]], [[TMP30]]
11619; CHECK-NEXT:    [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
11620; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[TMP49]], [[RES3]]
11621; CHECK-NEXT:    store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
11622; CHECK-NEXT:    ret <4 x float> [[RES4]]
11623;
11624  %1 = fneg <4 x float> %x2
11625  %2 = extractelement <4 x float> %x0, i64 0
11626  %3 = extractelement <4 x float> %x1, i64 0
11627  %4 = extractelement <4 x float> %1, i64 0
11628  %5 = call float @llvm.fma.f32(float %2, float %3, float %4)
11629  %6 = extractelement <4 x float> %x2, i64 0
11630  %7 = bitcast i8 %x3 to <8 x i1>
11631  %8 = extractelement <8 x i1> %7, i64 0
11632  %9 = select i1 %8, float %5, float %6
11633  %10 = insertelement <4 x float> %x2, float %9, i64 0
11634  %11 = fneg <4 x float> %x2
11635  %12 = extractelement <4 x float> %x0, i64 0
11636  %13 = extractelement <4 x float> %x1, i64 0
11637  %14 = extractelement <4 x float> %11, i64 0
11638  %15 = call float @llvm.x86.avx512.vfmadd.f32(float %12, float %13, float %14, i32 11)
11639  %16 = extractelement <4 x float> %x2, i64 0
11640  %17 = insertelement <4 x float> %x2, float %15, i64 0
11641  %18 = fneg <4 x float> %x2
11642  %19 = extractelement <4 x float> %x0, i64 0
11643  %20 = extractelement <4 x float> %x1, i64 0
11644  %21 = extractelement <4 x float> %18, i64 0
11645  %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 10)
11646  %23 = extractelement <4 x float> %x2, i64 0
11647  %24 = bitcast i8 %x3 to <8 x i1>
11648  %25 = extractelement <8 x i1> %24, i64 0
11649  %26 = select i1 %25, float %22, float %23
11650  %27 = insertelement <4 x float> %x2, float %26, i64 0
11651  %res3 = fadd <4 x float> %10, %17
11652  %res4 = fadd <4 x float> %27, %res3
11653  ret <4 x float> %res4
11654}
11655
11656define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
11657; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_sd(
11658; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
11659; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11660; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11661; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
11662; CHECK-NEXT:    call void @llvm.donothing()
11663; CHECK-NEXT:    [[TMP5:%.*]] = fneg <2 x double> [[X0:%.*]]
11664; CHECK-NEXT:    [[TMP6:%.*]] = fneg <2 x double> [[X2:%.*]]
11665; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11666; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
11667; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11668; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
11669; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11670; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i64 0
11671; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
11672; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
11673; CHECK-NEXT:    [[TMP10:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP9]])
11674; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11675; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[X2]], i64 0
11676; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11677; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
11678; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
11679; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
11680; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
11681; CHECK-NEXT:    [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
11682; CHECK-NEXT:    [[TMP17:%.*]] = bitcast double [[TMP11]] to i64
11683; CHECK-NEXT:    [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
11684; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP4]]
11685; CHECK-NEXT:    [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP5]]
11686; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP20]], i64 [[TMP15]]
11687; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP14]], double [[TMP10]], double [[TMP11]]
11688; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0
11689; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <2 x double> [[X2]], double [[TMP21]], i64 0
11690; CHECK-NEXT:    [[TMP23:%.*]] = fneg <2 x double> [[X0]]
11691; CHECK-NEXT:    [[TMP24:%.*]] = fneg <2 x double> [[X2]]
11692; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11693; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x double> [[TMP23]], i64 0
11694; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11695; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <2 x double> [[X1]], i64 0
11696; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11697; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <2 x double> [[TMP24]], i64 0
11698; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
11699; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
11700; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
11701; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
11702; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
11703; CHECK-NEXT:    br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
11704; CHECK:       28:
11705; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11706; CHECK-NEXT:    unreachable
11707; CHECK:       29:
11708; CHECK-NEXT:    [[TMP30:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP25]], double [[TMP26]], double [[TMP27]], i32 11)
11709; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11710; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <2 x double> [[X2]], i64 0
11711; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP2]], i64 0, i64 0
11712; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <2 x double> [[X2]], double [[TMP30]], i64 0
11713; CHECK-NEXT:    [[TMP33:%.*]] = fneg <2 x double> [[X0]]
11714; CHECK-NEXT:    [[TMP34:%.*]] = fneg <2 x double> [[X2]]
11715; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
11716; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i64 0
11717; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
11718; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <2 x double> [[X1]], i64 0
11719; CHECK-NEXT:    [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11720; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x double> [[TMP34]], i64 0
11721; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
11722; CHECK-NEXT:    [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
11723; CHECK-NEXT:    [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
11724; CHECK-NEXT:    [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
11725; CHECK-NEXT:    [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
11726; CHECK-NEXT:    br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
11727; CHECK:       38:
11728; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11729; CHECK-NEXT:    unreachable
11730; CHECK:       39:
11731; CHECK-NEXT:    [[TMP40:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP35]], double [[TMP36]], double [[TMP37]], i32 10)
11732; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
11733; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <2 x double> [[X2]], i64 0
11734; CHECK-NEXT:    [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11735; CHECK-NEXT:    [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
11736; CHECK-NEXT:    [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
11737; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
11738; CHECK-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 [[_MSPROP16]]
11739; CHECK-NEXT:    [[TMP46:%.*]] = bitcast double [[TMP40]] to i64
11740; CHECK-NEXT:    [[TMP47:%.*]] = bitcast double [[TMP41]] to i64
11741; CHECK-NEXT:    [[TMP48:%.*]] = xor i64 [[TMP46]], [[TMP47]]
11742; CHECK-NEXT:    [[TMP49:%.*]] = or i64 [[TMP48]], 0
11743; CHECK-NEXT:    [[TMP50:%.*]] = or i64 [[TMP49]], [[_MSPROP16]]
11744; CHECK-NEXT:    [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP50]], i64 [[TMP45]]
11745; CHECK-NEXT:    [[TMP51:%.*]] = select i1 [[TMP44]], double [[TMP40]], double [[TMP41]]
11746; CHECK-NEXT:    [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT18]], i64 0
11747; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <2 x double> [[X2]], double [[TMP51]], i64 0
11748; CHECK-NEXT:    [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
11749; CHECK-NEXT:    [[RES3:%.*]] = fadd <2 x double> [[TMP22]], [[TMP32]]
11750; CHECK-NEXT:    [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
11751; CHECK-NEXT:    [[RES4:%.*]] = fadd <2 x double> [[TMP52]], [[RES3]]
11752; CHECK-NEXT:    store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
11753; CHECK-NEXT:    ret <2 x double> [[RES4]]
11754;
11755  %1 = fneg <2 x double> %x0
11756  %2 = fneg <2 x double> %x2
11757  %3 = extractelement <2 x double> %1, i64 0
11758  %4 = extractelement <2 x double> %x1, i64 0
11759  %5 = extractelement <2 x double> %2, i64 0
11760  %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
11761  %7 = extractelement <2 x double> %x2, i64 0
11762  %8 = bitcast i8 %x3 to <8 x i1>
11763  %9 = extractelement <8 x i1> %8, i64 0
11764  %10 = select i1 %9, double %6, double %7
11765  %11 = insertelement <2 x double> %x2, double %10, i64 0
11766  %12 = fneg <2 x double> %x0
11767  %13 = fneg <2 x double> %x2
11768  %14 = extractelement <2 x double> %12, i64 0
11769  %15 = extractelement <2 x double> %x1, i64 0
11770  %16 = extractelement <2 x double> %13, i64 0
11771  %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
11772  %18 = extractelement <2 x double> %x2, i64 0
11773  %19 = insertelement <2 x double> %x2, double %17, i64 0
11774  %20 = fneg <2 x double> %x0
11775  %21 = fneg <2 x double> %x2
11776  %22 = extractelement <2 x double> %20, i64 0
11777  %23 = extractelement <2 x double> %x1, i64 0
11778  %24 = extractelement <2 x double> %21, i64 0
11779  %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 10)
11780  %26 = extractelement <2 x double> %x2, i64 0
11781  %27 = bitcast i8 %x3 to <8 x i1>
11782  %28 = extractelement <8 x i1> %27, i64 0
11783  %29 = select i1 %28, double %25, double %26
11784  %30 = insertelement <2 x double> %x2, double %29, i64 0
11785  %res3 = fadd <2 x double> %11, %19
11786  %res4 = fadd <2 x double> %30, %res3
11787  ret <2 x double> %res4
11788}
11789
11790define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
11791; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_ss(
11792; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
11793; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11794; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11795; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
11796; CHECK-NEXT:    call void @llvm.donothing()
11797; CHECK-NEXT:    [[TMP5:%.*]] = fneg <4 x float> [[X0:%.*]]
11798; CHECK-NEXT:    [[TMP6:%.*]] = fneg <4 x float> [[X2:%.*]]
11799; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11800; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
11801; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11802; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
11803; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11804; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i64 0
11805; CHECK-NEXT:    [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
11806; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
11807; CHECK-NEXT:    [[TMP10:%.*]] = call float @llvm.fma.f32(float [[TMP7]], float [[TMP8]], float [[TMP9]])
11808; CHECK-NEXT:    [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11809; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[X2]], i64 0
11810; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11811; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
11812; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
11813; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
11814; CHECK-NEXT:    [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
11815; CHECK-NEXT:    [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
11816; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float [[TMP11]] to i32
11817; CHECK-NEXT:    [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
11818; CHECK-NEXT:    [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP4]]
11819; CHECK-NEXT:    [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP5]]
11820; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP20]], i32 [[TMP15]]
11821; CHECK-NEXT:    [[TMP21:%.*]] = select i1 [[TMP14]], float [[TMP10]], float [[TMP11]]
11822; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
11823; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[X2]], float [[TMP21]], i64 0
11824; CHECK-NEXT:    [[TMP23:%.*]] = fneg <4 x float> [[X0]]
11825; CHECK-NEXT:    [[TMP24:%.*]] = fneg <4 x float> [[X2]]
11826; CHECK-NEXT:    [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11827; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x float> [[TMP23]], i64 0
11828; CHECK-NEXT:    [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11829; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x float> [[X1]], i64 0
11830; CHECK-NEXT:    [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11831; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x float> [[TMP24]], i64 0
11832; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
11833; CHECK-NEXT:    [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
11834; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
11835; CHECK-NEXT:    [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
11836; CHECK-NEXT:    [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
11837; CHECK-NEXT:    br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
11838; CHECK:       28:
11839; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11840; CHECK-NEXT:    unreachable
11841; CHECK:       29:
11842; CHECK-NEXT:    [[TMP30:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP25]], float [[TMP26]], float [[TMP27]], i32 11)
11843; CHECK-NEXT:    [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11844; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x float> [[X2]], i64 0
11845; CHECK-NEXT:    [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i64 0
11846; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x float> [[X2]], float [[TMP30]], i64 0
11847; CHECK-NEXT:    [[TMP33:%.*]] = fneg <4 x float> [[X0]]
11848; CHECK-NEXT:    [[TMP34:%.*]] = fneg <4 x float> [[X2]]
11849; CHECK-NEXT:    [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
11850; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i64 0
11851; CHECK-NEXT:    [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11852; CHECK-NEXT:    [[TMP36:%.*]] = extractelement <4 x float> [[X1]], i64 0
11853; CHECK-NEXT:    [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11854; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x float> [[TMP34]], i64 0
11855; CHECK-NEXT:    [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
11856; CHECK-NEXT:    [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
11857; CHECK-NEXT:    [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
11858; CHECK-NEXT:    [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
11859; CHECK-NEXT:    [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
11860; CHECK-NEXT:    br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
11861; CHECK:       38:
11862; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11863; CHECK-NEXT:    unreachable
11864; CHECK:       39:
11865; CHECK-NEXT:    [[TMP40:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP35]], float [[TMP36]], float [[TMP37]], i32 10)
11866; CHECK-NEXT:    [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11867; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <4 x float> [[X2]], i64 0
11868; CHECK-NEXT:    [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11869; CHECK-NEXT:    [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
11870; CHECK-NEXT:    [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
11871; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
11872; CHECK-NEXT:    [[TMP45:%.*]] = select i1 [[TMP44]], i32 0, i32 [[_MSPROP16]]
11873; CHECK-NEXT:    [[TMP46:%.*]] = bitcast float [[TMP40]] to i32
11874; CHECK-NEXT:    [[TMP47:%.*]] = bitcast float [[TMP41]] to i32
11875; CHECK-NEXT:    [[TMP48:%.*]] = xor i32 [[TMP46]], [[TMP47]]
11876; CHECK-NEXT:    [[TMP49:%.*]] = or i32 [[TMP48]], 0
11877; CHECK-NEXT:    [[TMP50:%.*]] = or i32 [[TMP49]], [[_MSPROP16]]
11878; CHECK-NEXT:    [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP50]], i32 [[TMP45]]
11879; CHECK-NEXT:    [[TMP51:%.*]] = select i1 [[TMP44]], float [[TMP40]], float [[TMP41]]
11880; CHECK-NEXT:    [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT18]], i64 0
11881; CHECK-NEXT:    [[TMP52:%.*]] = insertelement <4 x float> [[X2]], float [[TMP51]], i64 0
11882; CHECK-NEXT:    [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
11883; CHECK-NEXT:    [[RES3:%.*]] = fadd <4 x float> [[TMP22]], [[TMP32]]
11884; CHECK-NEXT:    [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
11885; CHECK-NEXT:    [[RES4:%.*]] = fadd <4 x float> [[TMP52]], [[RES3]]
11886; CHECK-NEXT:    store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
11887; CHECK-NEXT:    ret <4 x float> [[RES4]]
11888;
11889  %1 = fneg <4 x float> %x0
11890  %2 = fneg <4 x float> %x2
11891  %3 = extractelement <4 x float> %1, i64 0
11892  %4 = extractelement <4 x float> %x1, i64 0
11893  %5 = extractelement <4 x float> %2, i64 0
11894  %6 = call float @llvm.fma.f32(float %3, float %4, float %5)
11895  %7 = extractelement <4 x float> %x2, i64 0
11896  %8 = bitcast i8 %x3 to <8 x i1>
11897  %9 = extractelement <8 x i1> %8, i64 0
11898  %10 = select i1 %9, float %6, float %7
11899  %11 = insertelement <4 x float> %x2, float %10, i64 0
11900  %12 = fneg <4 x float> %x0
11901  %13 = fneg <4 x float> %x2
11902  %14 = extractelement <4 x float> %12, i64 0
11903  %15 = extractelement <4 x float> %x1, i64 0
11904  %16 = extractelement <4 x float> %13, i64 0
11905  %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
11906  %18 = extractelement <4 x float> %x2, i64 0
11907  %19 = insertelement <4 x float> %x2, float %17, i64 0
11908  %20 = fneg <4 x float> %x0
11909  %21 = fneg <4 x float> %x2
11910  %22 = extractelement <4 x float> %20, i64 0
11911  %23 = extractelement <4 x float> %x1, i64 0
11912  %24 = extractelement <4 x float> %21, i64 0
11913  %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 10)
11914  %26 = extractelement <4 x float> %x2, i64 0
11915  %27 = bitcast i8 %x3 to <8 x i1>
11916  %28 = extractelement <8 x i1> %27, i64 0
11917  %29 = select i1 %28, float %25, float %26
11918  %30 = insertelement <4 x float> %x2, float %29, i64 0
11919  %res3 = fadd <4 x float> %11, %19
11920  %res4 = fadd <4 x float> %30, %res3
11921  ret <4 x float> %res4
11922}
11923
11924define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, ptr%ptr_b ,i8 %x3,i32 %x4, <4 x float> %extra_param) #0 {
11925; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss_rm(
11926; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11927; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
11928; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
11929; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11930; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11931; CHECK-NEXT:    call void @llvm.donothing()
11932; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11933; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP25:%.*]], !prof [[PROF1]]
11934; CHECK:       6:
11935; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11936; CHECK-NEXT:    unreachable
11937; CHECK:       7:
11938; CHECK-NEXT:    [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
11939; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
11940; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
11941; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
11942; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
11943; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD]], i32 0
11944; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[Q]], i32 0
11945; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
11946; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
11947; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
11948; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
11949; CHECK-NEXT:    [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
11950; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
11951; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
11952; CHECK-NEXT:    [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
11953; CHECK-NEXT:    [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
11954; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
11955; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
11956; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
11957; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
11958; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP3]]
11959; CHECK-NEXT:    [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
11960; CHECK-NEXT:    [[TMP19:%.*]] = bitcast float [[TMP12]] to i32
11961; CHECK-NEXT:    [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
11962; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
11963; CHECK-NEXT:    [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP3]]
11964; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
11965; CHECK-NEXT:    [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP12]]
11966; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
11967; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[X1]], float [[TMP23]], i64 0
11968; CHECK-NEXT:    store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
11969; CHECK-NEXT:    ret <4 x float> [[TMP24]]
11970;
11971  %q = load float, ptr %ptr_b
11972  %vecinit.i = insertelement <4 x float> %extra_param, float %q, i32 0
11973  %1 = extractelement <4 x float> %x0, i64 0
11974  %2 = extractelement <4 x float> %vecinit.i, i64 0
11975  %3 = extractelement <4 x float> %x1, i64 0
11976  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
11977  %5 = bitcast i8 %x3 to <8 x i1>
11978  %6 = extractelement <8 x i1> %5, i64 0
11979  %7 = select i1 %6, float %4, float %3
11980  %8 = insertelement <4 x float> %x1, float %7, i64 0
11981  ret <4 x float> %8
11982}
11983
11984define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4, <4 x float> %extra_param) #0 {
11985; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss_rm(
11986; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11987; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
11988; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
11989; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11990; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
11991; CHECK-NEXT:    call void @llvm.donothing()
11992; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
11993; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP25:%.*]], !prof [[PROF1]]
11994; CHECK:       6:
11995; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
11996; CHECK-NEXT:    unreachable
11997; CHECK:       7:
11998; CHECK-NEXT:    [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
11999; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
12000; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
12001; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
12002; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
12003; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[_MSLD]], i32 0
12004; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[Q]], i32 0
12005; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
12006; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
12007; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
12008; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
12009; CHECK-NEXT:    [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
12010; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
12011; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
12012; CHECK-NEXT:    [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
12013; CHECK-NEXT:    [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
12014; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
12015; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
12016; CHECK-NEXT:    [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
12017; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
12018; CHECK-NEXT:    [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP1]]
12019; CHECK-NEXT:    [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
12020; CHECK-NEXT:    [[TMP19:%.*]] = bitcast float [[TMP10]] to i32
12021; CHECK-NEXT:    [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
12022; CHECK-NEXT:    [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
12023; CHECK-NEXT:    [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP1]]
12024; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
12025; CHECK-NEXT:    [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP10]]
12026; CHECK-NEXT:    [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
12027; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[X0]], float [[TMP23]], i64 0
12028; CHECK-NEXT:    store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
12029; CHECK-NEXT:    ret <4 x float> [[TMP24]]
12030;
12031  %q = load float, ptr %ptr_b
12032  %vecinit.i = insertelement <4 x float> %extra_param, float %q, i32 0
12033  %1 = extractelement <4 x float> %x0, i64 0
12034  %2 = extractelement <4 x float> %vecinit.i, i64 0
12035  %3 = extractelement <4 x float> %x1, i64 0
12036  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
12037  %5 = bitcast i8 %x3 to <8 x i1>
12038  %6 = extractelement <8 x i1> %5, i64 0
12039  %7 = select i1 %6, float %4, float %1
12040  %8 = insertelement <4 x float> %x0, float %7, i64 0
12041  ret <4 x float> %8
12042}
12043
12044
12045define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4, <4 x float> %extra_param) #0 {
12046; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_rm(
12047; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
12048; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
12049; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
12050; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
12051; CHECK-NEXT:    call void @llvm.donothing()
12052; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
12053; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
12054; CHECK:       5:
12055; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
12056; CHECK-NEXT:    unreachable
12057; CHECK:       6:
12058; CHECK-NEXT:    [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
12059; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64
12060; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
12061; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
12062; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
12063; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[_MSLD]], i32 0
12064; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> [[EXTRA_PARAM:%.*]], float [[Q]], i32 0
12065; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
12066; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
12067; CHECK-NEXT:    [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
12068; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
12069; CHECK-NEXT:    [[_MSPROP3:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
12070; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
12071; CHECK-NEXT:    [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
12072; CHECK-NEXT:    [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
12073; CHECK-NEXT:    [[TMP12:%.*]] = call float @llvm.fma.f32(float [[TMP9]], float [[TMP10]], float [[TMP11]])
12074; CHECK-NEXT:    [[TMP13:%.*]] = select i1 false, i32 [[_MSPROP5]], i32 0
12075; CHECK-NEXT:    [[TMP14:%.*]] = bitcast float [[TMP12]] to i32
12076; CHECK-NEXT:    [[TMP15:%.*]] = xor i32 [[TMP14]], 0
12077; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP5]]
12078; CHECK-NEXT:    [[TMP17:%.*]] = or i32 [[TMP16]], 0
12079; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP17]], i32 [[TMP13]]
12080; CHECK-NEXT:    [[TMP18:%.*]] = select i1 false, float [[TMP12]], float 0.000000e+00
12081; CHECK-NEXT:    [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
12082; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
12083; CHECK-NEXT:    store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8
12084; CHECK-NEXT:    ret <4 x float> [[TMP19]]
12085;
12086  %q = load float, ptr %ptr_b
12087  %vecinit.i = insertelement <4 x float> %extra_param, float %q, i32 0
12088  %1 = extractelement <4 x float> %x0, i64 0
12089  %2 = extractelement <4 x float> %x1, i64 0
12090  %3 = extractelement <4 x float> %vecinit.i, i64 0
12091  %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
12092  %5 = select i1 false, float %4, float 0.000000e+00
12093  %6 = insertelement <4 x float> %x0, float %5, i64 0
12094  ret <4 x float> %6
12095}
12096
12097define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
12098; CHECK-LABEL: @test_x86_avx512_psll_d_512(
12099; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12100; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12101; CHECK-NEXT:    call void @llvm.donothing()
12102; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12103; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12104; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12105; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12106; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
12107; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12108; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
12109; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12110; CHECK-NEXT:    store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
12111; CHECK-NEXT:    ret <16 x i32> [[RES]]
12112;
12113  %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12114  ret <16 x i32> %res
12115}
12116define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
12117; CHECK-LABEL: @test_x86_avx512_mask_psll_d_512(
12118; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12119; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12120; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12121; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12122; CHECK-NEXT:    call void @llvm.donothing()
12123; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12124; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12125; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12126; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12127; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
12128; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12129; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
12130; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12131; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12132; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12133; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
12134; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12135; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
12136; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
12137; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
12138; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12139; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12140; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12141;
12142  %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12143  %mask.cast = bitcast i16 %mask to <16 x i1>
12144  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12145  ret <16 x i32> %res2
12146}
12147define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
12148; CHECK-LABEL: @test_x86_avx512_maskz_psll_d_512(
12149; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12150; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12151; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12152; CHECK-NEXT:    call void @llvm.donothing()
12153; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12154; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12155; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12156; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12157; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
12158; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12159; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
12160; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12161; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12162; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12163; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
12164; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12165; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
12166; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
12167; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
12168; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12169; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12170; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12171;
12172  %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12173  %mask.cast = bitcast i16 %mask to <16 x i1>
12174  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12175  ret <16 x i32> %res2
12176}
12177declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
12178
12179
12180define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
12181; CHECK-LABEL: @test_x86_avx512_psll_q_512(
12182; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12183; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12184; CHECK-NEXT:    call void @llvm.donothing()
12185; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12186; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12187; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12188; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12189; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
12190; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12191; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
12192; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12193; CHECK-NEXT:    store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
12194; CHECK-NEXT:    ret <8 x i64> [[RES]]
12195;
12196  %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12197  ret <8 x i64> %res
12198}
12199define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
12200; CHECK-LABEL: @test_x86_avx512_mask_psll_q_512(
12201; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12202; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12203; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12204; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12205; CHECK-NEXT:    call void @llvm.donothing()
12206; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12207; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12208; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12209; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12210; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
12211; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12212; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
12213; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12214; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12215; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12216; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
12217; CHECK-NEXT:    [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12218; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
12219; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
12220; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
12221; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12222; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12223; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12224;
12225  %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12226  %mask.cast = bitcast i8 %mask to <8 x i1>
12227  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12228  ret <8 x i64> %res2
12229}
12230define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
12231; CHECK-LABEL: @test_x86_avx512_maskz_psll_q_512(
12232; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12233; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12234; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12235; CHECK-NEXT:    call void @llvm.donothing()
12236; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12237; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12238; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12239; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12240; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
12241; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12242; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
12243; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12244; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12245; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12246; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
12247; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12248; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
12249; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
12250; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
12251; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12252; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12253; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12254;
12255  %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12256  %mask.cast = bitcast i8 %mask to <8 x i1>
12257  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12258  ret <8 x i64> %res2
12259}
12260declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone
12261
12262
12263define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) #0 {
12264; CHECK-LABEL: @test_x86_avx512_pslli_d_512(
12265; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12266; CHECK-NEXT:    call void @llvm.donothing()
12267; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
12268; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
12269; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12270; CHECK-NEXT:    store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
12271; CHECK-NEXT:    ret <16 x i32> [[RES]]
12272;
12273  %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12274  ret <16 x i32> %res
12275}
12276define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
12277; CHECK-LABEL: @test_x86_avx512_mask_pslli_d_512(
12278; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12279; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12280; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12281; CHECK-NEXT:    call void @llvm.donothing()
12282; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
12283; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
12284; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12285; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12286; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12287; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
12288; CHECK-NEXT:    [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12289; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
12290; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
12291; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
12292; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12293; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12294; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12295;
12296  %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12297  %mask.cast = bitcast i16 %mask to <16 x i1>
12298  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12299  ret <16 x i32> %res2
12300}
12301define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) #0 {
12302; CHECK-LABEL: @test_x86_avx512_maskz_pslli_d_512(
12303; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12304; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12305; CHECK-NEXT:    call void @llvm.donothing()
12306; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
12307; CHECK-NEXT:    [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
12308; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12309; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12310; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12311; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
12312; CHECK-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12313; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
12314; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
12315; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
12316; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12317; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12318; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12319;
12320  %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12321  %mask.cast = bitcast i16 %mask to <16 x i1>
12322  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12323  ret <16 x i32> %res2
12324}
12325declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone
12326
12327
12328define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) #0 {
12329; CHECK-LABEL: @test_x86_avx512_pslli_q_512(
12330; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12331; CHECK-NEXT:    call void @llvm.donothing()
12332; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
12333; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
12334; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12335; CHECK-NEXT:    store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
12336; CHECK-NEXT:    ret <8 x i64> [[RES]]
12337;
12338  %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12339  ret <8 x i64> %res
12340}
12341define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
12342; CHECK-LABEL: @test_x86_avx512_mask_pslli_q_512(
12343; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12344; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12345; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12346; CHECK-NEXT:    call void @llvm.donothing()
12347; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
12348; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
12349; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12350; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12351; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12352; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
12353; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12354; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
12355; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
12356; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
12357; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12358; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12359; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12360;
12361  %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12362  %mask.cast = bitcast i8 %mask to <8 x i1>
12363  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12364  ret <8 x i64> %res2
12365}
12366define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
12367; CHECK-LABEL: @test_x86_avx512_maskz_pslli_q_512(
12368; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12369; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12370; CHECK-NEXT:    call void @llvm.donothing()
12371; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
12372; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
12373; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12374; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12375; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12376; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
12377; CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12378; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
12379; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
12380; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
12381; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12382; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12383; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12384;
12385  %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12386  %mask.cast = bitcast i8 %mask to <8 x i1>
12387  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12388  ret <8 x i64> %res2
12389}
12390declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone
12391
12392
12393define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
12394; CHECK-LABEL: @test_x86_avx512_psra_q_512(
12395; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12396; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12397; CHECK-NEXT:    call void @llvm.donothing()
12398; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12399; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12400; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12401; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12402; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
12403; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12404; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
12405; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12406; CHECK-NEXT:    store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
12407; CHECK-NEXT:    ret <8 x i64> [[RES]]
12408;
12409  %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12410  ret <8 x i64> %res
12411}
12412define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
12413; CHECK-LABEL: @test_x86_avx512_mask_psra_q_512(
12414; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12415; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12416; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12417; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12418; CHECK-NEXT:    call void @llvm.donothing()
12419; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12420; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12421; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12422; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12423; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
12424; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12425; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
12426; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12427; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12428; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12429; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
12430; CHECK-NEXT:    [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12431; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
12432; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
12433; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
12434; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12435; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12436; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12437;
12438  %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12439  %mask.cast = bitcast i8 %mask to <8 x i1>
12440  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12441  ret <8 x i64> %res2
12442}
12443define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
12444; CHECK-LABEL: @test_x86_avx512_maskz_psra_q_512(
12445; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12446; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12447; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12448; CHECK-NEXT:    call void @llvm.donothing()
12449; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12450; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12451; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12452; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12453; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
12454; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12455; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
12456; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12457; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12458; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12459; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
12460; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12461; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
12462; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
12463; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
12464; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12465; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12466; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12467;
12468  %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12469  %mask.cast = bitcast i8 %mask to <8 x i1>
12470  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12471  ret <8 x i64> %res2
12472}
12473declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone
12474
12475
12476define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
12477; CHECK-LABEL: @test_x86_avx512_psra_d_512(
12478; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12479; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12480; CHECK-NEXT:    call void @llvm.donothing()
12481; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12482; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12483; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12484; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12485; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
12486; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12487; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
12488; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12489; CHECK-NEXT:    store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
12490; CHECK-NEXT:    ret <16 x i32> [[RES]]
12491;
12492  %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12493  ret <16 x i32> %res
12494}
12495define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
12496; CHECK-LABEL: @test_x86_avx512_mask_psra_d_512(
12497; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12498; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12499; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12500; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12501; CHECK-NEXT:    call void @llvm.donothing()
12502; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12503; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12504; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12505; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12506; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
12507; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12508; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
12509; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12510; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12511; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12512; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
12513; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12514; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
12515; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
12516; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
12517; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12518; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12519; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12520;
12521  %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12522  %mask.cast = bitcast i16 %mask to <16 x i1>
12523  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12524  ret <16 x i32> %res2
12525}
12526define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
12527; CHECK-LABEL: @test_x86_avx512_maskz_psra_d_512(
12528; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12529; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12530; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12531; CHECK-NEXT:    call void @llvm.donothing()
12532; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12533; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12534; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12535; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12536; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
12537; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12538; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
12539; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12540; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12541; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12542; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
12543; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12544; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
12545; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
12546; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
12547; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12548; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12549; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12550;
12551  %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12552  %mask.cast = bitcast i16 %mask to <16 x i1>
12553  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12554  ret <16 x i32> %res2
12555}
12556declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone
12557
12558
12559
12560define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) #0 {
12561; CHECK-LABEL: @test_x86_avx512_psrai_q_512(
12562; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12563; CHECK-NEXT:    call void @llvm.donothing()
12564; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
12565; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
12566; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
12567; CHECK-NEXT:    store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
12568; CHECK-NEXT:    ret <8 x i64> [[RES]]
12569;
12570  %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12571  ret <8 x i64> %res
12572}
12573define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
12574; CHECK-LABEL: @test_x86_avx512_mask_psrai_q_512(
12575; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12576; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12577; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12578; CHECK-NEXT:    call void @llvm.donothing()
12579; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
12580; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
12581; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
12582; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12583; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12584; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
12585; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12586; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
12587; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
12588; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
12589; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12590; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12591; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12592;
12593  %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12594  %mask.cast = bitcast i8 %mask to <8 x i1>
12595  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12596  ret <8 x i64> %res2
12597}
12598define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) #0 {
12599; CHECK-LABEL: @test_x86_avx512_maskz_psrai_q_512(
12600; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12601; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12602; CHECK-NEXT:    call void @llvm.donothing()
12603; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
12604; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
12605; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
12606; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12607; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12608; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
12609; CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12610; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
12611; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
12612; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
12613; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12614; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12615; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12616;
12617  %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12618  %mask.cast = bitcast i8 %mask to <8 x i1>
12619  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12620  ret <8 x i64> %res2
12621}
12622declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone
12623
12624
12625define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) #0 {
12626; CHECK-LABEL: @test_x86_avx512_psrai_d_512(
12627; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12628; CHECK-NEXT:    call void @llvm.donothing()
12629; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
12630; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
12631; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
12632; CHECK-NEXT:    store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
12633; CHECK-NEXT:    ret <16 x i32> [[RES]]
12634;
12635  %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12636  ret <16 x i32> %res
12637}
12638define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
12639; CHECK-LABEL: @test_x86_avx512_mask_psrai_d_512(
12640; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12641; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12642; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12643; CHECK-NEXT:    call void @llvm.donothing()
12644; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
12645; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
12646; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
12647; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12648; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12649; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
12650; CHECK-NEXT:    [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12651; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
12652; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
12653; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
12654; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12655; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12656; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12657;
12658  %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12659  %mask.cast = bitcast i16 %mask to <16 x i1>
12660  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12661  ret <16 x i32> %res2
12662}
12663define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) #0 {
12664; CHECK-LABEL: @test_x86_avx512_maskz_psrai_d_512(
12665; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12666; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12667; CHECK-NEXT:    call void @llvm.donothing()
12668; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
12669; CHECK-NEXT:    [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
12670; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
12671; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12672; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12673; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
12674; CHECK-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12675; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
12676; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
12677; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
12678; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12679; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12680; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12681;
12682  %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12683  %mask.cast = bitcast i16 %mask to <16 x i1>
12684  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12685  ret <16 x i32> %res2
12686}
12687declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone
12688
12689
12690
12691define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
12692; CHECK-LABEL: @test_x86_avx512_psrl_d_512(
12693; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12694; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12695; CHECK-NEXT:    call void @llvm.donothing()
12696; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12697; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12698; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12699; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12700; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
12701; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12702; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
12703; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12704; CHECK-NEXT:    store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
12705; CHECK-NEXT:    ret <16 x i32> [[RES]]
12706;
12707  %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12708  ret <16 x i32> %res
12709}
12710define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
12711; CHECK-LABEL: @test_x86_avx512_mask_psrl_d_512(
12712; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12713; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12714; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12715; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12716; CHECK-NEXT:    call void @llvm.donothing()
12717; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12718; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12719; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12720; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12721; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
12722; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12723; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
12724; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12725; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12726; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12727; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
12728; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12729; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
12730; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
12731; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
12732; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12733; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12734; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12735;
12736  %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12737  %mask.cast = bitcast i16 %mask to <16 x i1>
12738  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12739  ret <16 x i32> %res2
12740}
12741define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
12742; CHECK-LABEL: @test_x86_avx512_maskz_psrl_d_512(
12743; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12744; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12745; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12746; CHECK-NEXT:    call void @llvm.donothing()
12747; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
12748; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12749; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12750; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12751; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
12752; CHECK-NEXT:    [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
12753; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
12754; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
12755; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
12756; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12757; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
12758; CHECK-NEXT:    [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12759; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
12760; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
12761; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
12762; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12763; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12764; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12765;
12766  %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
12767  %mask.cast = bitcast i16 %mask to <16 x i1>
12768  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12769  ret <16 x i32> %res2
12770}
12771declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone
12772
12773
12774define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
12775; CHECK-LABEL: @test_x86_avx512_psrl_q_512(
12776; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12777; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12778; CHECK-NEXT:    call void @llvm.donothing()
12779; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12780; CHECK-NEXT:    [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
12781; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
12782; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
12783; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
12784; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12785; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
12786; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12787; CHECK-NEXT:    store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
12788; CHECK-NEXT:    ret <8 x i64> [[RES]]
12789;
12790  %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12791  ret <8 x i64> %res
12792}
12793define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
12794; CHECK-LABEL: @test_x86_avx512_mask_psrl_q_512(
12795; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12796; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12797; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
12798; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12799; CHECK-NEXT:    call void @llvm.donothing()
12800; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12801; CHECK-NEXT:    [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
12802; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
12803; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
12804; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
12805; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12806; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
12807; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12808; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12809; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12810; CHECK-NEXT:    [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
12811; CHECK-NEXT:    [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12812; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
12813; CHECK-NEXT:    [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
12814; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
12815; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12816; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12817; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12818;
12819  %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12820  %mask.cast = bitcast i8 %mask to <8 x i1>
12821  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12822  ret <8 x i64> %res2
12823}
12824define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
12825; CHECK-LABEL: @test_x86_avx512_maskz_psrl_q_512(
12826; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12827; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12828; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
12829; CHECK-NEXT:    call void @llvm.donothing()
12830; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
12831; CHECK-NEXT:    [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
12832; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
12833; CHECK-NEXT:    [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
12834; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
12835; CHECK-NEXT:    [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
12836; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
12837; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
12838; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
12839; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12840; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
12841; CHECK-NEXT:    [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12842; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
12843; CHECK-NEXT:    [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
12844; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
12845; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12846; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12847; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12848;
12849  %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
12850  %mask.cast = bitcast i8 %mask to <8 x i1>
12851  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12852  ret <8 x i64> %res2
12853}
12854declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone
12855
12856
12857define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) #0 {
12858; CHECK-LABEL: @test_x86_avx512_psrli_d_512(
12859; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12860; CHECK-NEXT:    call void @llvm.donothing()
12861; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
12862; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
12863; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12864; CHECK-NEXT:    store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
12865; CHECK-NEXT:    ret <16 x i32> [[RES]]
12866;
12867  %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12868  ret <16 x i32> %res
12869}
12870define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
12871; CHECK-LABEL: @test_x86_avx512_mask_psrli_d_512(
12872; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12873; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12874; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12875; CHECK-NEXT:    call void @llvm.donothing()
12876; CHECK-NEXT:    [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
12877; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
12878; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12879; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12880; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12881; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
12882; CHECK-NEXT:    [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
12883; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
12884; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
12885; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
12886; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
12887; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12888; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12889;
12890  %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12891  %mask.cast = bitcast i16 %mask to <16 x i1>
12892  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
12893  ret <16 x i32> %res2
12894}
12895define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) #0 {
12896; CHECK-LABEL: @test_x86_avx512_maskz_psrli_d_512(
12897; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12898; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12899; CHECK-NEXT:    call void @llvm.donothing()
12900; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
12901; CHECK-NEXT:    [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
12902; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
12903; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
12904; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
12905; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
12906; CHECK-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
12907; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
12908; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
12909; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
12910; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
12911; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12912; CHECK-NEXT:    ret <16 x i32> [[RES2]]
12913;
12914  %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
12915  %mask.cast = bitcast i16 %mask to <16 x i1>
12916  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
12917  ret <16 x i32> %res2
12918}
12919declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone
12920
12921
12922define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) #0 {
12923; CHECK-LABEL: @test_x86_avx512_psrli_q_512(
12924; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12925; CHECK-NEXT:    call void @llvm.donothing()
12926; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
12927; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
12928; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12929; CHECK-NEXT:    store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
12930; CHECK-NEXT:    ret <8 x i64> [[RES]]
12931;
12932  %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12933  ret <8 x i64> %res
12934}
12935define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
12936; CHECK-LABEL: @test_x86_avx512_mask_psrli_q_512(
12937; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12938; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
12939; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12940; CHECK-NEXT:    call void @llvm.donothing()
12941; CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
12942; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
12943; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12944; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12945; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12946; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
12947; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
12948; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
12949; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
12950; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
12951; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
12952; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12953; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12954;
12955  %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12956  %mask.cast = bitcast i8 %mask to <8 x i1>
12957  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
12958  ret <8 x i64> %res2
12959}
12960define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) #0 {
12961; CHECK-LABEL: @test_x86_avx512_maskz_psrli_q_512(
12962; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
12963; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12964; CHECK-NEXT:    call void @llvm.donothing()
12965; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
12966; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
12967; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
12968; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
12969; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
12970; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
12971; CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
12972; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
12973; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
12974; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
12975; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
12976; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
12977; CHECK-NEXT:    ret <8 x i64> [[RES2]]
12978;
12979  %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
12980  %mask.cast = bitcast i8 %mask to <8 x i1>
12981  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
12982  ret <8 x i64> %res2
12983}
12984declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone
12985
12986define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
12987; CHECK-LABEL: @test_x86_avx512_psllv_d_512(
12988; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
12989; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
12990; CHECK-NEXT:    call void @llvm.donothing()
12991; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
12992; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
12993; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
12994; CHECK-NEXT:    [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
12995; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
12996; CHECK-NEXT:    store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
12997; CHECK-NEXT:    ret <16 x i32> [[RES]]
12998;
12999  %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13000  ret <16 x i32> %res
13001}
13002
13003define <16 x i32> @test_x86_avx512_psllv_d_512_const() #0 {
13004; CHECK-LABEL: @test_x86_avx512_psllv_d_512_const(
13005; CHECK-NEXT:    call void @llvm.donothing()
13006; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13007; CHECK-NEXT:    [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
13008; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13009; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
13010; CHECK-NEXT:    [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
13011; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
13012; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
13013; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
13014; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13015; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13016;
13017  %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13018  %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1>)
13019  %res2 = add <16 x i32> %res0, %res1
13020  ret <16 x i32> %res2
13021}
13022
13023define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
13024; CHECK-LABEL: @test_x86_avx512_mask_psllv_d_512(
13025; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13026; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13027; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13028; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13029; CHECK-NEXT:    call void @llvm.donothing()
13030; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13031; CHECK-NEXT:    [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
13032; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13033; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
13034; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13035; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13036; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13037; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
13038; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
13039; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
13040; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
13041; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
13042; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
13043; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13044; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13045;
13046  %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13047  %mask.cast = bitcast i16 %mask to <16 x i1>
13048  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
13049  ret <16 x i32> %res2
13050}
13051
13052define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
13053; CHECK-LABEL: @test_x86_avx512_maskz_psllv_d_512(
13054; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13055; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13056; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13057; CHECK-NEXT:    call void @llvm.donothing()
13058; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13059; CHECK-NEXT:    [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
13060; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13061; CHECK-NEXT:    [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
13062; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13063; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13064; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13065; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
13066; CHECK-NEXT:    [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
13067; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
13068; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
13069; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
13070; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
13071; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13072; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13073;
13074  %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13075  %mask.cast = bitcast i16 %mask to <16 x i1>
13076  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
13077  ret <16 x i32> %res2
13078}
13079
13080declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
13081
13082define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
13083; CHECK-LABEL: @test_x86_avx512_psllv_q_512(
13084; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13085; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13086; CHECK-NEXT:    call void @llvm.donothing()
13087; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13088; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
13089; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13090; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
13091; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13092; CHECK-NEXT:    store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
13093; CHECK-NEXT:    ret <8 x i64> [[RES]]
13094;
13095  %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13096  ret <8 x i64> %res
13097}
13098
13099define <8 x i64> @test_x86_avx512_psllv_q_512_const() #0 {
13100; CHECK-LABEL: @test_x86_avx512_psllv_q_512_const(
13101; CHECK-NEXT:    call void @llvm.donothing()
13102; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
13103; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
13104; CHECK-NEXT:    [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
13105; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
13106; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
13107; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
13108; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
13109; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
13110; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13111; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13112;
13113  %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
13114  %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1,  i64 1, i64 1, i64 1, i64 -1>)
13115  %res2 = add <8 x i64> %res0, %res1
13116  ret <8 x i64> %res2
13117}
13118
13119define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
13120; CHECK-LABEL: @test_x86_avx512_mask_psllv_q_512(
13121; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13122; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13123; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13124; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13125; CHECK-NEXT:    call void @llvm.donothing()
13126; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13127; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
13128; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13129; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
13130; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13131; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13132; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13133; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
13134; CHECK-NEXT:    [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
13135; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
13136; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
13137; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
13138; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
13139; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13140; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13141;
13142  %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13143  %mask.cast = bitcast i8 %mask to <8 x i1>
13144  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
13145  ret <8 x i64> %res2
13146}
13147
13148define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
13149; CHECK-LABEL: @test_x86_avx512_maskz_psllv_q_512(
13150; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13151; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13152; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13153; CHECK-NEXT:    call void @llvm.donothing()
13154; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13155; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
13156; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13157; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
13158; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13159; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13160; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13161; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
13162; CHECK-NEXT:    [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
13163; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
13164; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
13165; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
13166; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
13167; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13168; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13169;
13170  %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13171  %mask.cast = bitcast i8 %mask to <8 x i1>
13172  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
13173  ret <8 x i64> %res2
13174}
13175
13176declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
13177
13178define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
13179; CHECK-LABEL: @test_x86_avx512_psrav_d_512(
13180; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13181; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13182; CHECK-NEXT:    call void @llvm.donothing()
13183; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13184; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
13185; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13186; CHECK-NEXT:    [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
13187; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13188; CHECK-NEXT:    store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
13189; CHECK-NEXT:    ret <16 x i32> [[RES]]
13190;
13191  %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
13192  ret <16 x i32> %res
13193}
13194
13195define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
13196; CHECK-LABEL: @test_x86_avx512_mask_psrav_d_512(
13197; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13198; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13199; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13200; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13201; CHECK-NEXT:    call void @llvm.donothing()
13202; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13203; CHECK-NEXT:    [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
13204; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13205; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
13206; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13207; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13208; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13209; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
13210; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
13211; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
13212; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
13213; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
13214; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
13215; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13216; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13217;
13218  %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
13219  %mask.cast = bitcast i16 %mask to <16 x i1>
13220  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
13221  ret <16 x i32> %res2
13222}
13223
13224define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
13225; CHECK-LABEL: @test_x86_avx512_maskz_psrav_d_512(
13226; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13227; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13228; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13229; CHECK-NEXT:    call void @llvm.donothing()
13230; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13231; CHECK-NEXT:    [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
13232; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13233; CHECK-NEXT:    [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
13234; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13235; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13236; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13237; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
13238; CHECK-NEXT:    [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
13239; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
13240; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
13241; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
13242; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
13243; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13244; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13245;
13246  %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
13247  %mask.cast = bitcast i16 %mask to <16 x i1>
13248  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
13249  ret <16 x i32> %res2
13250}
13251
13252declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone
13253
13254define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
13255; CHECK-LABEL: @test_x86_avx512_psrav_q_512(
13256; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13257; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13258; CHECK-NEXT:    call void @llvm.donothing()
13259; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13260; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
13261; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13262; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
13263; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13264; CHECK-NEXT:    store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
13265; CHECK-NEXT:    ret <8 x i64> [[RES]]
13266;
13267  %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
13268  ret <8 x i64> %res
13269}
13270
13271define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
13272; CHECK-LABEL: @test_x86_avx512_mask_psrav_q_512(
13273; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13274; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13275; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13276; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13277; CHECK-NEXT:    call void @llvm.donothing()
13278; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13279; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
13280; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13281; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
13282; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13283; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13284; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13285; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
13286; CHECK-NEXT:    [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
13287; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
13288; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
13289; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
13290; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
13291; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13292; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13293;
13294  %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
13295  %mask.cast = bitcast i8 %mask to <8 x i1>
13296  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
13297  ret <8 x i64> %res2
13298}
13299
13300define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
13301; CHECK-LABEL: @test_x86_avx512_maskz_psrav_q_512(
13302; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13303; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13304; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13305; CHECK-NEXT:    call void @llvm.donothing()
13306; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13307; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
13308; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13309; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
13310; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13311; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13312; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13313; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
13314; CHECK-NEXT:    [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
13315; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
13316; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
13317; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
13318; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
13319; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13320; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13321;
13322  %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
13323  %mask.cast = bitcast i8 %mask to <8 x i1>
13324  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
13325  ret <8 x i64> %res2
13326}
13327
13328declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone
13329
13330define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
13331; CHECK-LABEL: @test_x86_avx512_psrlv_d_512(
13332; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13333; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13334; CHECK-NEXT:    call void @llvm.donothing()
13335; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13336; CHECK-NEXT:    [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
13337; CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13338; CHECK-NEXT:    [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
13339; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13340; CHECK-NEXT:    store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
13341; CHECK-NEXT:    ret <16 x i32> [[RES]]
13342;
13343  %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13344  ret <16 x i32> %res
13345}
13346
13347define <16 x i32> @test_x86_avx512_psrlv_d_512_const() #0 {
13348; CHECK-LABEL: @test_x86_avx512_psrlv_d_512_const(
13349; CHECK-NEXT:    call void @llvm.donothing()
13350; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13351; CHECK-NEXT:    [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
13352; CHECK-NEXT:    [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13353; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
13354; CHECK-NEXT:    [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
13355; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
13356; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
13357; CHECK-NEXT:    [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
13358; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13359; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13360;
13361  %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
13362  %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,  i32 1, i32 1, i32 1, i32 1, i32 -1  >)
13363  %res2 = add <16 x i32> %res0, %res1
13364  ret <16 x i32> %res2
13365}
13366
13367define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
13368; CHECK-LABEL: @test_x86_avx512_mask_psrlv_d_512(
13369; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13370; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13371; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13372; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13373; CHECK-NEXT:    call void @llvm.donothing()
13374; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13375; CHECK-NEXT:    [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
13376; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13377; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
13378; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13379; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13380; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13381; CHECK-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
13382; CHECK-NEXT:    [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
13383; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
13384; CHECK-NEXT:    [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
13385; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
13386; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
13387; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13388; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13389;
13390  %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13391  %mask.cast = bitcast i16 %mask to <16 x i1>
13392  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
13393  ret <16 x i32> %res2
13394}
13395
13396define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
13397; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_d_512(
13398; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
13399; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13400; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13401; CHECK-NEXT:    call void @llvm.donothing()
13402; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
13403; CHECK-NEXT:    [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
13404; CHECK-NEXT:    [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
13405; CHECK-NEXT:    [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
13406; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
13407; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
13408; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
13409; CHECK-NEXT:    [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
13410; CHECK-NEXT:    [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
13411; CHECK-NEXT:    [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
13412; CHECK-NEXT:    [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
13413; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
13414; CHECK-NEXT:    [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
13415; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13416; CHECK-NEXT:    ret <16 x i32> [[RES2]]
13417;
13418  %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
13419  %mask.cast = bitcast i16 %mask to <16 x i1>
13420  %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
13421  ret <16 x i32> %res2
13422}
13423
13424declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
13425
13426define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
13427; CHECK-LABEL: @test_x86_avx512_psrlv_q_512(
13428; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13429; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13430; CHECK-NEXT:    call void @llvm.donothing()
13431; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13432; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
13433; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13434; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
13435; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13436; CHECK-NEXT:    store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
13437; CHECK-NEXT:    ret <8 x i64> [[RES]]
13438;
13439  %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13440  ret <8 x i64> %res
13441}
13442
13443define <8 x i64> @test_x86_avx512_psrlv_q_512_const() #0 {
13444; CHECK-LABEL: @test_x86_avx512_psrlv_q_512_const(
13445; CHECK-NEXT:    call void @llvm.donothing()
13446; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
13447; CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
13448; CHECK-NEXT:    [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
13449; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
13450; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
13451; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
13452; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
13453; CHECK-NEXT:    [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
13454; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13455; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13456;
13457  %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
13458  %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1,  i64 1, i64 1, i64 1, i64 -1>)
13459  %res2 = add <8 x i64> %res0, %res1
13460  ret <8 x i64> %res2
13461}
13462
13463define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
13464; CHECK-LABEL: @test_x86_avx512_mask_psrlv_q_512(
13465; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13466; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13467; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13468; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13469; CHECK-NEXT:    call void @llvm.donothing()
13470; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13471; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
13472; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13473; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
13474; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13475; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13476; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13477; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
13478; CHECK-NEXT:    [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
13479; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
13480; CHECK-NEXT:    [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
13481; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
13482; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
13483; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13484; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13485;
13486  %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13487  %mask.cast = bitcast i8 %mask to <8 x i1>
13488  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
13489  ret <8 x i64> %res2
13490}
13491
13492define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
13493; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_q_512(
13494; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13495; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13496; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13497; CHECK-NEXT:    call void @llvm.donothing()
13498; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
13499; CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
13500; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
13501; CHECK-NEXT:    [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
13502; CHECK-NEXT:    [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
13503; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
13504; CHECK-NEXT:    [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
13505; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
13506; CHECK-NEXT:    [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
13507; CHECK-NEXT:    [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
13508; CHECK-NEXT:    [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
13509; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
13510; CHECK-NEXT:    [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
13511; CHECK-NEXT:    store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13512; CHECK-NEXT:    ret <8 x i64> [[RES2]]
13513;
13514  %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
13515  %mask.cast = bitcast i8 %mask to <8 x i1>
13516  %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
13517  ret <8 x i64> %res2
13518}
13519
13520declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
13521
13522
13523define <8 x double> @test_mm256_castpd128_pd256_freeze(<2 x double> %a0) nounwind #0 {
13524; CHECK-LABEL: @test_mm256_castpd128_pd256_freeze(
13525; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
13526; CHECK-NEXT:    call void @llvm.donothing()
13527; CHECK-NEXT:    [[A1:%.*]] = freeze <2 x double> poison
13528; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13529; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13530; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13531; CHECK-NEXT:    ret <8 x double> [[RES]]
13532;
13533  %a1 = freeze <2 x double> poison
13534  %res = shufflevector <2 x double> %a0, <2 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13535  ret <8 x double> %res
13536}
13537
13538
13539define <8 x double> @test_mm256_castpd256_pd256_freeze(<4 x double> %a0) nounwind #0 {
13540; CHECK-LABEL: @test_mm256_castpd256_pd256_freeze(
13541; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
13542; CHECK-NEXT:    call void @llvm.donothing()
13543; CHECK-NEXT:    [[A1:%.*]] = freeze <4 x double> poison
13544; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13545; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13546; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13547; CHECK-NEXT:    ret <8 x double> [[RES]]
13548;
13549  %a1 = freeze <4 x double> poison
13550  %res = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13551  ret <8 x double> %res
13552}
13553
13554
13555define <16 x float> @test_mm256_castps128_ps512_freeze(<4 x float> %a0) nounwind #0 {
13556; CHECK-LABEL: @test_mm256_castps128_ps512_freeze(
13557; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
13558; CHECK-NEXT:    call void @llvm.donothing()
13559; CHECK-NEXT:    [[A1:%.*]] = freeze <4 x float> poison
13560; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13561; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13562; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13563; CHECK-NEXT:    ret <16 x float> [[RES]]
13564;
13565  %a1 = freeze <4 x float> poison
13566  %res = shufflevector <4 x float> %a0, <4 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
13567  ret <16 x float> %res
13568}
13569
13570
13571define <16 x float> @test_mm256_castps256_ps512_freeze(<8 x float> %a0) nounwind #0 {
13572; CHECK-LABEL: @test_mm256_castps256_ps512_freeze(
13573; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
13574; CHECK-NEXT:    call void @llvm.donothing()
13575; CHECK-NEXT:    [[A1:%.*]] = freeze <8 x float> poison
13576; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13577; CHECK-NEXT:    [[RES:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13578; CHECK-NEXT:    store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13579; CHECK-NEXT:    ret <16 x float> [[RES]]
13580;
13581  %a1 = freeze <8 x float> poison
13582  %res = shufflevector <8 x float> %a0, <8 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13583  ret <16 x float> %res
13584}
13585
13586
13587define <8 x i64> @test_mm512_castsi128_si512_freeze(<2 x i64> %a0) nounwind #0 {
13588; CHECK-LABEL: @test_mm512_castsi128_si512_freeze(
13589; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
13590; CHECK-NEXT:    call void @llvm.donothing()
13591; CHECK-NEXT:    [[A1:%.*]] = freeze <2 x i64> poison
13592; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13593; CHECK-NEXT:    [[RES:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13594; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13595; CHECK-NEXT:    ret <8 x i64> [[RES]]
13596;
13597  %a1 = freeze <2 x i64> poison
13598  %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
13599  ret <8 x i64> %res
13600}
13601
13602
13603define <8 x i64> @test_mm512_castsi256_si512_pd256_freeze(<4 x i64> %a0) nounwind #0 {
13604; CHECK-LABEL: @test_mm512_castsi256_si512_pd256_freeze(
13605; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
13606; CHECK-NEXT:    call void @llvm.donothing()
13607; CHECK-NEXT:    [[A1:%.*]] = freeze <4 x i64> poison
13608; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13609; CHECK-NEXT:    [[RES:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13610; CHECK-NEXT:    store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
13611; CHECK-NEXT:    ret <8 x i64> [[RES]]
13612;
13613  %a1 = freeze <4 x i64> poison
13614  %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13615  ret <8 x i64> %res
13616}
13617
13618
13619define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f, <16 x i1> %extra_param, <16 x i1> %extra_param2) #0 {
13620; CHECK-LABEL: @bad_mask_transition(
13621; CHECK-NEXT:  entry:
13622; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13623; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13624; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
13625; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
13626; CHECK-NEXT:    [[TMP8:%.*]] = load <16 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 384) to ptr), align 8
13627; CHECK-NEXT:    [[TMP9:%.*]] = load <16 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 392) to ptr), align 8
13628; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
13629; CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
13630; CHECK-NEXT:    call void @llvm.donothing()
13631; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
13632; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
13633; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
13634; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
13635; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
13636; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
13637; CHECK:       10:
13638; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
13639; CHECK-NEXT:    unreachable
13640; CHECK:       11:
13641; CHECK-NEXT:    [[TMP10:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
13642; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i1> [[TMP10]] to i8
13643; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
13644; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i512 [[TMP12]], 0
13645; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
13646; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP13]], 0
13647; CHECK-NEXT:    [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
13648; CHECK-NEXT:    br i1 [[_MSOR4]], label [[TMP30:%.*]], label [[TMP34:%.*]], !prof [[PROF1]]
13649; CHECK:       16:
13650; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
13651; CHECK-NEXT:    unreachable
13652; CHECK:       17:
13653; CHECK-NEXT:    [[TMP16:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[C:%.*]], <8 x double> [[D:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
13654; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x i1> [[TMP16]] to i8
13655; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP11]] to i16
13656; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP17]] to i16
13657; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
13658; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i16 [[CONV2]] to <16 x i1>
13659; CHECK-NEXT:    [[TMP20:%.*]] = shufflevector <16 x i1> zeroinitializer, <16 x i1> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13660; CHECK-NEXT:    [[TMP31:%.*]] = shufflevector <16 x i1> [[TMP18]], <16 x i1> [[EXTRA_PARAM:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13661; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <16 x i1> zeroinitializer, <16 x i1> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13662; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <16 x i1> [[TMP19]], <16 x i1> [[EXTRA_PARAM2:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13663; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13664; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <8 x i1> [[TMP31]], <8 x i1> [[TMP32]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13665; CHECK-NEXT:    [[TMP33:%.*]] = select <16 x i1> [[TMP23]], <16 x i32> [[TMP4]], <16 x i32> [[TMP5]]
13666; CHECK-NEXT:    [[TMP24:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
13667; CHECK-NEXT:    [[TMP25:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
13668; CHECK-NEXT:    [[TMP26:%.*]] = xor <16 x i32> [[TMP24]], [[TMP25]]
13669; CHECK-NEXT:    [[TMP27:%.*]] = or <16 x i32> [[TMP26]], [[TMP4]]
13670; CHECK-NEXT:    [[TMP28:%.*]] = or <16 x i32> [[TMP27]], [[TMP5]]
13671; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP22]], <16 x i32> [[TMP28]], <16 x i32> [[TMP33]]
13672; CHECK-NEXT:    [[TMP29:%.*]] = select <16 x i1> [[TMP23]], <16 x float> [[F]], <16 x float> [[E]]
13673; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13674; CHECK-NEXT:    ret <16 x float> [[TMP29]]
13675;
13676entry:
13677  %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
13678  %1 = bitcast <8 x i1> %0 to i8
13679  %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
13680  %3 = bitcast <8 x i1> %2 to i8
13681  %conv = zext i8 %1 to i16
13682  %conv2 = zext i8 %3 to i16
13683  %4 = bitcast i16 %conv to <16 x i1>
13684  %5 = bitcast i16 %conv2 to <16 x i1>
13685  %6 = shufflevector <16 x i1> %4, <16 x i1> %extra_param, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13686  %7 = shufflevector <16 x i1> %5, <16 x i1> %extra_param2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
13687  %8 = shufflevector <8 x i1> %6, <8 x i1> %7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
13688  %9 = select <16 x i1> %8, <16 x float> %f, <16 x float> %e
13689  ret <16 x float> %9
13690}
13691
13692define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 {
13693; CHECK-LABEL: @bad_mask_transition_2(
13694; CHECK-NEXT:  entry:
13695; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
13696; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
13697; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
13698; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
13699; CHECK-NEXT:    call void @llvm.donothing()
13700; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
13701; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
13702; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
13703; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
13704; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
13705; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
13706; CHECK:       6:
13707; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
13708; CHECK-NEXT:    unreachable
13709; CHECK:       7:
13710; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
13711; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i1> [[TMP8]] to i8
13712; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP9]] to i16
13713; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
13714; CHECK-NEXT:    [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
13715; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
13716; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
13717; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i32> [[TMP12]], [[TMP13]]
13718; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP2]]
13719; CHECK-NEXT:    [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
13720; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP16]], <16 x i32> [[TMP11]]
13721; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[F]], <16 x float> [[E]]
13722; CHECK-NEXT:    store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
13723; CHECK-NEXT:    ret <16 x float> [[TMP17]]
13724;
13725entry:
13726  %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
13727  %1 = bitcast <8 x i1> %0 to i8
13728  %conv = zext i8 %1 to i16
13729  %2 = bitcast i16 %conv to <16 x i1>
13730  %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e
13731  ret <16 x float> %3
13732}
13733
13734declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>)
13735declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>)
13736declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
13737declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
13738declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>)
13739declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>)
13740declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
13741declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
13742
13743attributes #0 = { sanitize_memory }
13744