xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2;
3; RUN: opt < %s -passes=msan -S | FileCheck %s
4;
5; Forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll
6
7target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
8target triple = "aarch64--linux-android9001"
9
10define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
11; CHECK-LABEL: define <8 x i8> @sqshl8b(
12; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
13; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
14; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
15; CHECK-NEXT:    call void @llvm.donothing()
16; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
17; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
18; CHECK:       3:
19; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
20; CHECK-NEXT:    unreachable
21; CHECK:       4:
22; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
23; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
24; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
25; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
26; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
27; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
28; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
29; CHECK:       8:
30; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
31; CHECK-NEXT:    unreachable
32; CHECK:       9:
33; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
34; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
35; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
36; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
37; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
38; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
39; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
40; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
41; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
42; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
43; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
44; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
45; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
46; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
47;
48  %tmp1 = load <8 x i8>, ptr %A
49  %tmp2 = load <8 x i8>, ptr %B
50  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
51  ret <8 x i8> %tmp3
52}
53
54define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
55; CHECK-LABEL: define <4 x i16> @sqshl4h(
56; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
57; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
58; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
59; CHECK-NEXT:    call void @llvm.donothing()
60; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
61; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
62; CHECK:       3:
63; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
64; CHECK-NEXT:    unreachable
65; CHECK:       4:
66; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
67; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
68; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
69; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
70; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
71; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
72; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
73; CHECK:       8:
74; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
75; CHECK-NEXT:    unreachable
76; CHECK:       9:
77; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
78; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
79; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
80; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
81; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
82; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
83; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
84; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
85; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
86; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
87; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
88; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
89; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
90; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
91;
92  %tmp1 = load <4 x i16>, ptr %A
93  %tmp2 = load <4 x i16>, ptr %B
94  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
95  ret <4 x i16> %tmp3
96}
97
98define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
99; CHECK-LABEL: define <2 x i32> @sqshl2s(
100; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
101; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
102; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
103; CHECK-NEXT:    call void @llvm.donothing()
104; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
105; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
106; CHECK:       3:
107; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
108; CHECK-NEXT:    unreachable
109; CHECK:       4:
110; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
111; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
112; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
113; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
114; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
115; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
116; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
117; CHECK:       8:
118; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
119; CHECK-NEXT:    unreachable
120; CHECK:       9:
121; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
122; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
123; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
124; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
125; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
126; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
127; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
128; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
129; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
130; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
131; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
132; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
133; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
134; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
135;
136  %tmp1 = load <2 x i32>, ptr %A
137  %tmp2 = load <2 x i32>, ptr %B
138  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
139  ret <2 x i32> %tmp3
140}
141
142define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
143; CHECK-LABEL: define <1 x i64> @sqshl1d(
144; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
145; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
146; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
147; CHECK-NEXT:    call void @llvm.donothing()
148; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
149; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
150; CHECK:       3:
151; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
152; CHECK-NEXT:    unreachable
153; CHECK:       4:
154; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
155; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
156; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
157; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
158; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
159; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
160; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
161; CHECK:       8:
162; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
163; CHECK-NEXT:    unreachable
164; CHECK:       9:
165; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
166; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
167; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
168; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
169; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
170; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
171; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
172; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
173; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
174; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
175; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
176; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
177; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
178; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
179;
180  %tmp1 = load <1 x i64>, ptr %A
181  %tmp2 = load <1 x i64>, ptr %B
182  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
183  ret <1 x i64> %tmp3
184}
185
186define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
187; CHECK-LABEL: define <1 x i64> @sqshl1d_constant(
188; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
189; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
190; CHECK-NEXT:    call void @llvm.donothing()
191; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
192; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
193; CHECK:       2:
194; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
195; CHECK-NEXT:    unreachable
196; CHECK:       3:
197; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
198; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
199; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
200; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
201; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
202; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
203; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
204; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
205; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
206; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
207;
208  %tmp1 = load <1 x i64>, ptr %A
209  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
210  ret <1 x i64> %tmp3
211}
212
213define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
214; CHECK-LABEL: define i64 @sqshl_scalar(
215; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
216; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
217; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
218; CHECK-NEXT:    call void @llvm.donothing()
219; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
220; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
221; CHECK:       3:
222; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
223; CHECK-NEXT:    unreachable
224; CHECK:       4:
225; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
226; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
227; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
228; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
229; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
230; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
231; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
232; CHECK:       8:
233; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
234; CHECK-NEXT:    unreachable
235; CHECK:       9:
236; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
237; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
238; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
239; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
240; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
241; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
242; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
243; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
244; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
245; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
246; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
247; CHECK-NEXT:    ret i64 [[TMP3]]
248;
249  %tmp1 = load i64, ptr %A
250  %tmp2 = load i64, ptr %B
251  %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
252  ret i64 %tmp3
253}
254
255define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
256; CHECK-LABEL: define i64 @sqshl_scalar_constant(
257; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
258; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
259; CHECK-NEXT:    call void @llvm.donothing()
260; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
261; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
262; CHECK:       2:
263; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
264; CHECK-NEXT:    unreachable
265; CHECK:       3:
266; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
267; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
268; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
269; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
270; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
271; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 1)
272; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
273; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 1)
274; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
275; CHECK-NEXT:    ret i64 [[TMP3]]
276;
277  %tmp1 = load i64, ptr %A
278  %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
279  ret i64 %tmp3
280}
281
282define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
283; CHECK-LABEL: define <8 x i8> @uqshl8b(
284; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
285; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
286; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
287; CHECK-NEXT:    call void @llvm.donothing()
288; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
289; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
290; CHECK:       3:
291; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
292; CHECK-NEXT:    unreachable
293; CHECK:       4:
294; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
295; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
296; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
297; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
298; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
299; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
300; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
301; CHECK:       8:
302; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
303; CHECK-NEXT:    unreachable
304; CHECK:       9:
305; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
306; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
307; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
308; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
309; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
310; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
311; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
312; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
313; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
314; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
315; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
316; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
317; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
318; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
319;
320  %tmp1 = load <8 x i8>, ptr %A
321  %tmp2 = load <8 x i8>, ptr %B
322  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
323  ret <8 x i8> %tmp3
324}
325
326define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
327; CHECK-LABEL: define <4 x i16> @uqshl4h(
328; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
329; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
330; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
331; CHECK-NEXT:    call void @llvm.donothing()
332; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
333; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
334; CHECK:       3:
335; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
336; CHECK-NEXT:    unreachable
337; CHECK:       4:
338; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
339; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
340; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
341; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
342; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
343; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
344; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
345; CHECK:       8:
346; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
347; CHECK-NEXT:    unreachable
348; CHECK:       9:
349; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
350; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
351; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
352; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
353; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
354; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
355; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
356; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
357; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
358; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
359; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
360; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
361; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
362; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
363;
364  %tmp1 = load <4 x i16>, ptr %A
365  %tmp2 = load <4 x i16>, ptr %B
366  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
367  ret <4 x i16> %tmp3
368}
369
370define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
371; CHECK-LABEL: define <2 x i32> @uqshl2s(
372; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
373; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
374; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
375; CHECK-NEXT:    call void @llvm.donothing()
376; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
377; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
378; CHECK:       3:
379; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
380; CHECK-NEXT:    unreachable
381; CHECK:       4:
382; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
383; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
384; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
385; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
386; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
387; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
388; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
389; CHECK:       8:
390; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
391; CHECK-NEXT:    unreachable
392; CHECK:       9:
393; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
394; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
395; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
396; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
397; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
398; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
399; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
400; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
401; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
402; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
403; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
404; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
405; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
406; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
407;
408  %tmp1 = load <2 x i32>, ptr %A
409  %tmp2 = load <2 x i32>, ptr %B
410  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
411  ret <2 x i32> %tmp3
412}
413
414define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
415; CHECK-LABEL: define <16 x i8> @sqshl16b(
416; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
417; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
418; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
419; CHECK-NEXT:    call void @llvm.donothing()
420; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
421; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
422; CHECK:       3:
423; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
424; CHECK-NEXT:    unreachable
425; CHECK:       4:
426; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
427; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
428; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
429; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
430; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
431; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
432; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
433; CHECK:       8:
434; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
435; CHECK-NEXT:    unreachable
436; CHECK:       9:
437; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
438; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
439; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
440; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
441; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
442; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
443; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
444; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
445; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
446; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
447; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
448; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
449; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
450; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
451; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
452;
453  %tmp1 = load <16 x i8>, ptr %A
454  %tmp2 = load <16 x i8>, ptr %B
455  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
456  ret <16 x i8> %tmp3
457}
458
459define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
460; CHECK-LABEL: define <8 x i16> @sqshl8h(
461; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
462; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
463; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
464; CHECK-NEXT:    call void @llvm.donothing()
465; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
466; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
467; CHECK:       3:
468; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
469; CHECK-NEXT:    unreachable
470; CHECK:       4:
471; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
472; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
473; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
474; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
475; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
476; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
477; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
478; CHECK:       8:
479; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
480; CHECK-NEXT:    unreachable
481; CHECK:       9:
482; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
483; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
484; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
485; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
486; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
487; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
488; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
489; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
490; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
491; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
492; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
493; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
494; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
495; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
496; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
497;
498  %tmp1 = load <8 x i16>, ptr %A
499  %tmp2 = load <8 x i16>, ptr %B
500  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
501  ret <8 x i16> %tmp3
502}
503
504define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
505; CHECK-LABEL: define <4 x i32> @sqshl4s(
506; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
507; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
508; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
509; CHECK-NEXT:    call void @llvm.donothing()
510; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
511; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
512; CHECK:       3:
513; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
514; CHECK-NEXT:    unreachable
515; CHECK:       4:
516; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
517; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
518; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
519; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
520; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
521; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
522; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
523; CHECK:       8:
524; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
525; CHECK-NEXT:    unreachable
526; CHECK:       9:
527; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
528; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
529; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
530; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
531; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
532; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
533; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
534; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
535; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
536; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
537; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
538; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
539; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
540; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
541; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
542;
543  %tmp1 = load <4 x i32>, ptr %A
544  %tmp2 = load <4 x i32>, ptr %B
545  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
546  ret <4 x i32> %tmp3
547}
548
549define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
550; CHECK-LABEL: define <2 x i64> @sqshl2d(
551; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
552; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
553; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
554; CHECK-NEXT:    call void @llvm.donothing()
555; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
556; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
557; CHECK:       3:
558; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
559; CHECK-NEXT:    unreachable
560; CHECK:       4:
561; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
562; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
563; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
564; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
565; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
566; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
567; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
568; CHECK:       8:
569; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
570; CHECK-NEXT:    unreachable
571; CHECK:       9:
572; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
573; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
574; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
575; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
576; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
577; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
578; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
579; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
580; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
581; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
582; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
583; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
584; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
585; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
586; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
587;
588  %tmp1 = load <2 x i64>, ptr %A
589  %tmp2 = load <2 x i64>, ptr %B
590  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
591  ret <2 x i64> %tmp3
592}
593
594define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
595; CHECK-LABEL: define <16 x i8> @uqshl16b(
596; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
597; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
598; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
599; CHECK-NEXT:    call void @llvm.donothing()
600; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
601; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
602; CHECK:       3:
603; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
604; CHECK-NEXT:    unreachable
605; CHECK:       4:
606; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
607; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
608; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
609; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
610; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
611; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
612; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
613; CHECK:       8:
614; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
615; CHECK-NEXT:    unreachable
616; CHECK:       9:
617; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
618; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
619; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
620; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
621; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
622; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
623; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
624; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
625; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
626; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
627; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
628; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
629; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
630; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
631; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
632;
633  %tmp1 = load <16 x i8>, ptr %A
634  %tmp2 = load <16 x i8>, ptr %B
635  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
636  ret <16 x i8> %tmp3
637}
638
639define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
640; CHECK-LABEL: define <8 x i16> @uqshl8h(
641; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
642; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
643; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
644; CHECK-NEXT:    call void @llvm.donothing()
645; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
646; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
647; CHECK:       3:
648; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
649; CHECK-NEXT:    unreachable
650; CHECK:       4:
651; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
652; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
653; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
654; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
655; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
656; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
657; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
658; CHECK:       8:
659; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
660; CHECK-NEXT:    unreachable
661; CHECK:       9:
662; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
663; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
664; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
665; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
666; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
667; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
668; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
669; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
670; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
671; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
672; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
673; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
674; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
675; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
676; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
677;
678  %tmp1 = load <8 x i16>, ptr %A
679  %tmp2 = load <8 x i16>, ptr %B
680  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
681  ret <8 x i16> %tmp3
682}
683
684define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
685; CHECK-LABEL: define <4 x i32> @uqshl4s(
686; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
687; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
688; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
689; CHECK-NEXT:    call void @llvm.donothing()
690; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
691; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
692; CHECK:       3:
693; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
694; CHECK-NEXT:    unreachable
695; CHECK:       4:
696; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
697; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
698; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
699; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
700; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
701; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
702; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
703; CHECK:       8:
704; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
705; CHECK-NEXT:    unreachable
706; CHECK:       9:
707; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
708; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
709; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
710; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
711; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
712; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
713; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
714; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
715; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
716; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
717; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
718; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
719; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
720; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
721; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
722;
723  %tmp1 = load <4 x i32>, ptr %A
724  %tmp2 = load <4 x i32>, ptr %B
725  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
726  ret <4 x i32> %tmp3
727}
728
729define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
730; CHECK-LABEL: define <2 x i64> @uqshl2d(
731; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
732; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
733; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
734; CHECK-NEXT:    call void @llvm.donothing()
735; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
736; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
737; CHECK:       3:
738; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
739; CHECK-NEXT:    unreachable
740; CHECK:       4:
741; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
742; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
743; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
744; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
745; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
746; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
747; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
748; CHECK:       8:
749; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
750; CHECK-NEXT:    unreachable
751; CHECK:       9:
752; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
753; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
754; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
755; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
756; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
757; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
758; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
759; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
760; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
761; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
762; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
763; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
764; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
765; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
766; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
767;
768  %tmp1 = load <2 x i64>, ptr %A
769  %tmp2 = load <2 x i64>, ptr %B
770  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
771  ret <2 x i64> %tmp3
772}
773
774define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
775; CHECK-LABEL: define <1 x i64> @uqshl1d(
776; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
777; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
778; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
779; CHECK-NEXT:    call void @llvm.donothing()
780; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
781; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
782; CHECK:       3:
783; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
784; CHECK-NEXT:    unreachable
785; CHECK:       4:
786; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
787; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
788; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
789; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
790; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
791; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
792; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
793; CHECK:       8:
794; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
795; CHECK-NEXT:    unreachable
796; CHECK:       9:
797; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
798; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
799; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
800; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
801; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
802; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
803; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
804; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
805; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
806; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
807; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
808; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
809; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
810; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
811;
812  %tmp1 = load <1 x i64>, ptr %A
813  %tmp2 = load <1 x i64>, ptr %B
814  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
815  ret <1 x i64> %tmp3
816}
817
818define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory {
819; CHECK-LABEL: define <1 x i64> @uqshl1d_constant(
820; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
821; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
822; CHECK-NEXT:    call void @llvm.donothing()
823; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
824; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
825; CHECK:       2:
826; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
827; CHECK-NEXT:    unreachable
828; CHECK:       3:
829; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
830; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
831; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
832; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
833; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
834; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
835; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
836; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
837; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
838; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
839;
840  %tmp1 = load <1 x i64>, ptr %A
841  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
842  ret <1 x i64> %tmp3
843}
844
845define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
846; CHECK-LABEL: define i64 @uqshl_scalar(
847; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
848; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
849; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
850; CHECK-NEXT:    call void @llvm.donothing()
851; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
852; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
853; CHECK:       3:
854; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
855; CHECK-NEXT:    unreachable
856; CHECK:       4:
857; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
858; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
859; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
860; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
861; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
862; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
863; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
864; CHECK:       8:
865; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
866; CHECK-NEXT:    unreachable
867; CHECK:       9:
868; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
869; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
870; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
871; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
872; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
873; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
874; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
875; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
876; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
877; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
878; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
879; CHECK-NEXT:    ret i64 [[TMP3]]
880;
881  %tmp1 = load i64, ptr %A
882  %tmp2 = load i64, ptr %B
883  %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
884  ret i64 %tmp3
885}
886
887define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
888; CHECK-LABEL: define i64 @uqshl_scalar_constant(
889; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
890; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
891; CHECK-NEXT:    call void @llvm.donothing()
892; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
893; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
894; CHECK:       2:
895; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
896; CHECK-NEXT:    unreachable
897; CHECK:       3:
898; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
899; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
900; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
901; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
902; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
903; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[_MSLD]], i64 1)
904; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
905; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 1)
906; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
907; CHECK-NEXT:    ret i64 [[TMP3]]
908;
909  %tmp1 = load i64, ptr %A
910  %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
911  ret i64 %tmp3
912}
913
914declare <8 x i8>  @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
915declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
916declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
917declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
918declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
919
920
921declare <8 x i8>  @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
922declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
923declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
924declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
925declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
926
927declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
928declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
929declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
930declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
931
932declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
933declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
934declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
935declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
936
937define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
938; CHECK-LABEL: define <8 x i8> @srshl8b(
939; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
940; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
941; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
942; CHECK-NEXT:    call void @llvm.donothing()
943; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
944; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
945; CHECK:       3:
946; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
947; CHECK-NEXT:    unreachable
948; CHECK:       4:
949; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
950; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
951; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
952; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
953; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
954; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
955; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
956; CHECK:       8:
957; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
958; CHECK-NEXT:    unreachable
959; CHECK:       9:
960; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
961; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
962; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
963; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
964; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
965; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
966; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
967; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
968; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
969; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
970; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
971; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
972; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
973; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
974;
975  %tmp1 = load <8 x i8>, ptr %A
976  %tmp2 = load <8 x i8>, ptr %B
977  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
978  ret <8 x i8> %tmp3
979}
980
981define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
982; CHECK-LABEL: define <4 x i16> @srshl4h(
983; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
984; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
985; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
986; CHECK-NEXT:    call void @llvm.donothing()
987; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
988; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
989; CHECK:       3:
990; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
991; CHECK-NEXT:    unreachable
992; CHECK:       4:
993; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
994; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
995; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
996; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
997; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
998; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
999; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1000; CHECK:       8:
1001; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1002; CHECK-NEXT:    unreachable
1003; CHECK:       9:
1004; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
1005; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1006; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1007; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1008; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
1009; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
1010; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1011; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1012; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
1013; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
1014; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
1015; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
1016; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1017; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
1018;
1019  %tmp1 = load <4 x i16>, ptr %A
1020  %tmp2 = load <4 x i16>, ptr %B
1021  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
1022  ret <4 x i16> %tmp3
1023}
1024
1025define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
1026; CHECK-LABEL: define <2 x i32> @srshl2s(
1027; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1028; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1029; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1030; CHECK-NEXT:    call void @llvm.donothing()
1031; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1032; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1033; CHECK:       3:
1034; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1035; CHECK-NEXT:    unreachable
1036; CHECK:       4:
1037; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
1038; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1039; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1040; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1041; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
1042; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1043; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1044; CHECK:       8:
1045; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1046; CHECK-NEXT:    unreachable
1047; CHECK:       9:
1048; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
1049; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1050; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1051; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1052; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
1053; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
1054; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1055; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1056; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
1057; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
1058; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
1059; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
1060; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1061; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
1062;
1063  %tmp1 = load <2 x i32>, ptr %A
1064  %tmp2 = load <2 x i32>, ptr %B
1065  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
1066  ret <2 x i32> %tmp3
1067}
1068
1069define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
1070; CHECK-LABEL: define <1 x i64> @srshl1d(
1071; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1072; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1073; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1074; CHECK-NEXT:    call void @llvm.donothing()
1075; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1076; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1077; CHECK:       3:
1078; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1079; CHECK-NEXT:    unreachable
1080; CHECK:       4:
1081; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
1082; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1083; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1084; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1085; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
1086; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1087; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1088; CHECK:       8:
1089; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1090; CHECK-NEXT:    unreachable
1091; CHECK:       9:
1092; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
1093; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1094; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1095; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1096; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
1097; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
1098; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1099; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1100; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
1101; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
1102; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
1103; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
1104; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1105; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
1106;
1107  %tmp1 = load <1 x i64>, ptr %A
1108  %tmp2 = load <1 x i64>, ptr %B
1109  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
1110  ret <1 x i64> %tmp3
1111}
1112
1113define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory {
1114; CHECK-LABEL: define <1 x i64> @srshl1d_constant(
1115; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
1116; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1117; CHECK-NEXT:    call void @llvm.donothing()
1118; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1119; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1120; CHECK:       2:
1121; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1122; CHECK-NEXT:    unreachable
1123; CHECK:       3:
1124; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
1125; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
1126; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
1127; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
1128; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
1129; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
1130; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
1131; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
1132; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1133; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
1134;
1135  %tmp1 = load <1 x i64>, ptr %A
1136  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
1137  ret <1 x i64> %tmp3
1138}
1139
1140define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
1141; CHECK-LABEL: define i64 @srshl_scalar(
1142; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1143; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1144; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1145; CHECK-NEXT:    call void @llvm.donothing()
1146; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1147; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1148; CHECK:       3:
1149; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1150; CHECK-NEXT:    unreachable
1151; CHECK:       4:
1152; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
1153; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1154; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1155; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1156; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
1157; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1158; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1159; CHECK:       8:
1160; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1161; CHECK-NEXT:    unreachable
1162; CHECK:       9:
1163; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
1164; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1165; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1166; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1167; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
1168; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
1169; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1170; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
1171; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
1172; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 [[TMP2]])
1173; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
1174; CHECK-NEXT:    ret i64 [[TMP3]]
1175;
1176  %tmp1 = load i64, ptr %A
1177  %tmp2 = load i64, ptr %B
1178  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
1179  ret i64 %tmp3
1180}
1181
1182define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory {
1183; CHECK-LABEL: define i64 @srshl_scalar_constant(
1184; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
1185; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1186; CHECK-NEXT:    call void @llvm.donothing()
1187; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1188; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1189; CHECK:       2:
1190; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1191; CHECK-NEXT:    unreachable
1192; CHECK:       3:
1193; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
1194; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
1195; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
1196; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
1197; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
1198; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 1)
1199; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
1200; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 1)
1201; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
1202; CHECK-NEXT:    ret i64 [[TMP3]]
1203;
1204  %tmp1 = load i64, ptr %A
1205  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
1206  ret i64 %tmp3
1207}
1208
1209define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
1210; CHECK-LABEL: define <8 x i8> @urshl8b(
1211; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1212; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1213; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1214; CHECK-NEXT:    call void @llvm.donothing()
1215; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1216; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1217; CHECK:       3:
1218; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1219; CHECK-NEXT:    unreachable
1220; CHECK:       4:
1221; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
1222; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1223; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1224; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1225; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
1226; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1227; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1228; CHECK:       8:
1229; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1230; CHECK-NEXT:    unreachable
1231; CHECK:       9:
1232; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
1233; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1234; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1235; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1236; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
1237; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
1238; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1239; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1240; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
1241; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
1242; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
1243; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1244; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1245; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
1246;
1247  %tmp1 = load <8 x i8>, ptr %A
1248  %tmp2 = load <8 x i8>, ptr %B
1249  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
1250  ret <8 x i8> %tmp3
1251}
1252
1253define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
1254; CHECK-LABEL: define <4 x i16> @urshl4h(
1255; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1256; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1257; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1258; CHECK-NEXT:    call void @llvm.donothing()
1259; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1260; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1261; CHECK:       3:
1262; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1263; CHECK-NEXT:    unreachable
1264; CHECK:       4:
1265; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
1266; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1267; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1268; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1269; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
1270; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1271; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1272; CHECK:       8:
1273; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1274; CHECK-NEXT:    unreachable
1275; CHECK:       9:
1276; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
1277; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1278; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1279; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1280; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
1281; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
1282; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1283; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1284; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
1285; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
1286; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
1287; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
1288; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1289; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
1290;
1291  %tmp1 = load <4 x i16>, ptr %A
1292  %tmp2 = load <4 x i16>, ptr %B
1293  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
1294  ret <4 x i16> %tmp3
1295}
1296
1297define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
1298; CHECK-LABEL: define <2 x i32> @urshl2s(
1299; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1300; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1301; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1302; CHECK-NEXT:    call void @llvm.donothing()
1303; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1304; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1305; CHECK:       3:
1306; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1307; CHECK-NEXT:    unreachable
1308; CHECK:       4:
1309; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
1310; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1311; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1312; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1313; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
1314; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1315; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1316; CHECK:       8:
1317; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1318; CHECK-NEXT:    unreachable
1319; CHECK:       9:
1320; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
1321; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1322; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1323; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1324; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
1325; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
1326; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1327; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1328; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
1329; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
1330; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
1331; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
1332; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1333; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
1334;
1335  %tmp1 = load <2 x i32>, ptr %A
1336  %tmp2 = load <2 x i32>, ptr %B
1337  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
1338  ret <2 x i32> %tmp3
1339}
1340
1341define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
1342; CHECK-LABEL: define <1 x i64> @urshl1d(
1343; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1344; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1345; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1346; CHECK-NEXT:    call void @llvm.donothing()
1347; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1348; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1349; CHECK:       3:
1350; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1351; CHECK-NEXT:    unreachable
1352; CHECK:       4:
1353; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
1354; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1355; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1356; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1357; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
1358; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1359; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1360; CHECK:       8:
1361; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1362; CHECK-NEXT:    unreachable
1363; CHECK:       9:
1364; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
1365; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1366; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1367; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1368; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
1369; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
1370; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1371; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1372; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
1373; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
1374; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
1375; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
1376; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1377; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
1378;
1379  %tmp1 = load <1 x i64>, ptr %A
1380  %tmp2 = load <1 x i64>, ptr %B
1381  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
1382  ret <1 x i64> %tmp3
1383}
1384
1385define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory {
1386; CHECK-LABEL: define <1 x i64> @urshl1d_constant(
1387; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
1388; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1389; CHECK-NEXT:    call void @llvm.donothing()
1390; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1391; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1392; CHECK:       2:
1393; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1394; CHECK-NEXT:    unreachable
1395; CHECK:       3:
1396; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
1397; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
1398; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
1399; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
1400; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
1401; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
1402; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
1403; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
1404; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1405; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
1406;
1407  %tmp1 = load <1 x i64>, ptr %A
1408  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
1409  ret <1 x i64> %tmp3
1410}
1411
1412define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
1413; CHECK-LABEL: define i64 @urshl_scalar(
1414; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1415; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1416; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1417; CHECK-NEXT:    call void @llvm.donothing()
1418; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1419; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1420; CHECK:       3:
1421; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1422; CHECK-NEXT:    unreachable
1423; CHECK:       4:
1424; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
1425; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1426; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1427; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1428; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
1429; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1430; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1431; CHECK:       8:
1432; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1433; CHECK-NEXT:    unreachable
1434; CHECK:       9:
1435; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
1436; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1437; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1438; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1439; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
1440; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
1441; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1442; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
1443; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
1444; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 [[TMP2]])
1445; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
1446; CHECK-NEXT:    ret i64 [[TMP3]]
1447;
1448  %tmp1 = load i64, ptr %A
1449  %tmp2 = load i64, ptr %B
1450  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
1451  ret i64 %tmp3
1452}
1453
1454define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory {
1455; CHECK-LABEL: define i64 @urshl_scalar_constant(
1456; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
1457; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1458; CHECK-NEXT:    call void @llvm.donothing()
1459; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1460; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1461; CHECK:       2:
1462; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1463; CHECK-NEXT:    unreachable
1464; CHECK:       3:
1465; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
1466; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
1467; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
1468; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
1469; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
1470; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 1)
1471; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
1472; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 1)
1473; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
1474; CHECK-NEXT:    ret i64 [[TMP3]]
1475;
1476  %tmp1 = load i64, ptr %A
1477  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
1478  ret i64 %tmp3
1479}
1480
1481define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
1482; CHECK-LABEL: define <16 x i8> @srshl16b(
1483; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1484; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1485; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1486; CHECK-NEXT:    call void @llvm.donothing()
1487; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1488; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1489; CHECK:       3:
1490; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1491; CHECK-NEXT:    unreachable
1492; CHECK:       4:
1493; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
1494; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1495; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1496; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1497; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
1498; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1499; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1500; CHECK:       8:
1501; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1502; CHECK-NEXT:    unreachable
1503; CHECK:       9:
1504; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
1505; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1506; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1507; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1508; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
1509; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
1510; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1511; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1512; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1513; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
1514; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
1515; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
1516; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1517; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1518; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
1519;
1520  %tmp1 = load <16 x i8>, ptr %A
1521  %tmp2 = load <16 x i8>, ptr %B
1522  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
1523  ret <16 x i8> %tmp3
1524}
1525
1526define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
1527; CHECK-LABEL: define <8 x i16> @srshl8h(
1528; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1529; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1530; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1531; CHECK-NEXT:    call void @llvm.donothing()
1532; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1533; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1534; CHECK:       3:
1535; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1536; CHECK-NEXT:    unreachable
1537; CHECK:       4:
1538; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
1539; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1540; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1541; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1542; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
1543; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1544; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1545; CHECK:       8:
1546; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1547; CHECK-NEXT:    unreachable
1548; CHECK:       9:
1549; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
1550; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1551; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1552; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1553; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
1554; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
1555; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1556; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1557; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1558; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
1559; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
1560; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
1561; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1562; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1563; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
1564;
1565  %tmp1 = load <8 x i16>, ptr %A
1566  %tmp2 = load <8 x i16>, ptr %B
1567  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
1568  ret <8 x i16> %tmp3
1569}
1570
1571define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
1572; CHECK-LABEL: define <4 x i32> @srshl4s(
1573; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1574; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1575; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1576; CHECK-NEXT:    call void @llvm.donothing()
1577; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1578; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1579; CHECK:       3:
1580; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1581; CHECK-NEXT:    unreachable
1582; CHECK:       4:
1583; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
1584; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1585; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1586; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1587; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1588; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1589; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1590; CHECK:       8:
1591; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1592; CHECK-NEXT:    unreachable
1593; CHECK:       9:
1594; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
1595; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1596; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1597; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1598; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
1599; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
1600; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1601; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1602; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1603; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
1604; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
1605; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
1606; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1607; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1608; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1609;
1610  %tmp1 = load <4 x i32>, ptr %A
1611  %tmp2 = load <4 x i32>, ptr %B
1612  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
1613  ret <4 x i32> %tmp3
1614}
1615
1616define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
1617; CHECK-LABEL: define <2 x i64> @srshl2d(
1618; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1619; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1620; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1621; CHECK-NEXT:    call void @llvm.donothing()
1622; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1623; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1624; CHECK:       3:
1625; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1626; CHECK-NEXT:    unreachable
1627; CHECK:       4:
1628; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
1629; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1630; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1631; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1632; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
1633; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1634; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1635; CHECK:       8:
1636; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1637; CHECK-NEXT:    unreachable
1638; CHECK:       9:
1639; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
1640; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1641; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1642; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1643; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
1644; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
1645; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1646; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1647; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1648; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
1649; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
1650; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
1651; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1652; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1653; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
1654;
1655  %tmp1 = load <2 x i64>, ptr %A
1656  %tmp2 = load <2 x i64>, ptr %B
1657  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
1658  ret <2 x i64> %tmp3
1659}
1660
1661define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
1662; CHECK-LABEL: define <16 x i8> @urshl16b(
1663; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1664; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1665; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1666; CHECK-NEXT:    call void @llvm.donothing()
1667; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1668; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1669; CHECK:       3:
1670; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1671; CHECK-NEXT:    unreachable
1672; CHECK:       4:
1673; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
1674; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1675; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1676; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1677; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
1678; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1679; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1680; CHECK:       8:
1681; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1682; CHECK-NEXT:    unreachable
1683; CHECK:       9:
1684; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
1685; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1686; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1687; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1688; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
1689; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
1690; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1691; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1692; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1693; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
1694; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
1695; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
1696; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1697; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1698; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
1699;
1700  %tmp1 = load <16 x i8>, ptr %A
1701  %tmp2 = load <16 x i8>, ptr %B
1702  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
1703  ret <16 x i8> %tmp3
1704}
1705
1706define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
1707; CHECK-LABEL: define <8 x i16> @urshl8h(
1708; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1709; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1710; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1711; CHECK-NEXT:    call void @llvm.donothing()
1712; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1713; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1714; CHECK:       3:
1715; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1716; CHECK-NEXT:    unreachable
1717; CHECK:       4:
1718; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
1719; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1720; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1721; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1722; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
1723; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1724; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1725; CHECK:       8:
1726; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1727; CHECK-NEXT:    unreachable
1728; CHECK:       9:
1729; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
1730; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1731; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1732; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1733; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
1734; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
1735; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1736; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1737; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1738; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
1739; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
1740; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
1741; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
1742; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1743; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
1744;
1745  %tmp1 = load <8 x i16>, ptr %A
1746  %tmp2 = load <8 x i16>, ptr %B
1747  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
1748  ret <8 x i16> %tmp3
1749}
1750
1751define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
1752; CHECK-LABEL: define <4 x i32> @urshl4s(
1753; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1754; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1755; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1756; CHECK-NEXT:    call void @llvm.donothing()
1757; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1758; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1759; CHECK:       3:
1760; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1761; CHECK-NEXT:    unreachable
1762; CHECK:       4:
1763; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
1764; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1765; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1766; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1767; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1768; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1769; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1770; CHECK:       8:
1771; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1772; CHECK-NEXT:    unreachable
1773; CHECK:       9:
1774; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
1775; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1776; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1777; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1778; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
1779; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
1780; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1781; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1782; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1783; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
1784; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
1785; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
1786; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
1787; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1788; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1789;
1790  %tmp1 = load <4 x i32>, ptr %A
1791  %tmp2 = load <4 x i32>, ptr %B
1792  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
1793  ret <4 x i32> %tmp3
1794}
1795
1796define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
1797; CHECK-LABEL: define <2 x i64> @urshl2d(
1798; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1799; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1800; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1801; CHECK-NEXT:    call void @llvm.donothing()
1802; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1803; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1804; CHECK:       3:
1805; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1806; CHECK-NEXT:    unreachable
1807; CHECK:       4:
1808; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
1809; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1810; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1811; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1812; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
1813; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1814; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1815; CHECK:       8:
1816; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1817; CHECK-NEXT:    unreachable
1818; CHECK:       9:
1819; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
1820; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1821; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1822; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1823; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
1824; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
1825; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
1826; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
1827; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
1828; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
1829; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
1830; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
1831; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
1832; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1833; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
1834;
1835  %tmp1 = load <2 x i64>, ptr %A
1836  %tmp2 = load <2 x i64>, ptr %B
1837  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
1838  ret <2 x i64> %tmp3
1839}
1840
1841declare <8 x i8>  @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1842declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1843declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1844declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1845declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
1846
1847declare <8 x i8>  @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
1848declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
1849declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
1850declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
1851declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
1852
1853declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1854declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1855declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1856declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1857
1858declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
1859declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
1860declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
1861declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
1862
1863define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
1864; CHECK-LABEL: define <8 x i8> @sqrshl8b(
1865; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1866; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1867; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1868; CHECK-NEXT:    call void @llvm.donothing()
1869; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1870; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1871; CHECK:       3:
1872; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1873; CHECK-NEXT:    unreachable
1874; CHECK:       4:
1875; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
1876; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1877; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1878; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1879; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
1880; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1881; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1882; CHECK:       8:
1883; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1884; CHECK-NEXT:    unreachable
1885; CHECK:       9:
1886; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
1887; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1888; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1889; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1890; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
1891; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
1892; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1893; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1894; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
1895; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
1896; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
1897; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1898; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1899; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
1900;
1901  %tmp1 = load <8 x i8>, ptr %A
1902  %tmp2 = load <8 x i8>, ptr %B
1903  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
1904  ret <8 x i8> %tmp3
1905}
1906
1907define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
1908; CHECK-LABEL: define <4 x i16> @sqrshl4h(
1909; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1910; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1911; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1912; CHECK-NEXT:    call void @llvm.donothing()
1913; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1914; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1915; CHECK:       3:
1916; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1917; CHECK-NEXT:    unreachable
1918; CHECK:       4:
1919; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
1920; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1921; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1922; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1923; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
1924; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1925; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1926; CHECK:       8:
1927; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1928; CHECK-NEXT:    unreachable
1929; CHECK:       9:
1930; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
1931; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1932; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1933; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1934; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
1935; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
1936; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1937; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1938; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
1939; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
1940; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
1941; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
1942; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1943; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
1944;
1945  %tmp1 = load <4 x i16>, ptr %A
1946  %tmp2 = load <4 x i16>, ptr %B
1947  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
1948  ret <4 x i16> %tmp3
1949}
1950
1951define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
1952; CHECK-LABEL: define <2 x i32> @sqrshl2s(
1953; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1954; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1955; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1956; CHECK-NEXT:    call void @llvm.donothing()
1957; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1958; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1959; CHECK:       3:
1960; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1961; CHECK-NEXT:    unreachable
1962; CHECK:       4:
1963; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
1964; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
1965; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
1966; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1967; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
1968; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
1969; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1970; CHECK:       8:
1971; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
1972; CHECK-NEXT:    unreachable
1973; CHECK:       9:
1974; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
1975; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
1976; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
1977; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
1978; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
1979; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
1980; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
1981; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
1982; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
1983; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
1984; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
1985; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
1986; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1987; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
1988;
1989  %tmp1 = load <2 x i32>, ptr %A
1990  %tmp2 = load <2 x i32>, ptr %B
1991  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
1992  ret <2 x i32> %tmp3
1993}
1994
1995define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
1996; CHECK-LABEL: define <8 x i8> @uqrshl8b(
1997; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
1998; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1999; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2000; CHECK-NEXT:    call void @llvm.donothing()
2001; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2002; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2003; CHECK:       3:
2004; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2005; CHECK-NEXT:    unreachable
2006; CHECK:       4:
2007; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
2008; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2009; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2010; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2011; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
2012; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2013; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2014; CHECK:       8:
2015; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2016; CHECK-NEXT:    unreachable
2017; CHECK:       9:
2018; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
2019; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2020; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2021; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2022; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
2023; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
2024; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
2025; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
2026; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
2027; CHECK-NEXT:    [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
2028; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
2029; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
2030; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2031; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
2032;
2033  %tmp1 = load <8 x i8>, ptr %A
2034  %tmp2 = load <8 x i8>, ptr %B
2035  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
2036  ret <8 x i8> %tmp3
2037}
2038
2039define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
2040; CHECK-LABEL: define <4 x i16> @uqrshl4h(
2041; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2042; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2043; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2044; CHECK-NEXT:    call void @llvm.donothing()
2045; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2046; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2047; CHECK:       3:
2048; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2049; CHECK-NEXT:    unreachable
2050; CHECK:       4:
2051; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
2052; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2053; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2054; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2055; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
2056; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2057; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2058; CHECK:       8:
2059; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2060; CHECK-NEXT:    unreachable
2061; CHECK:       9:
2062; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
2063; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2064; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2065; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2066; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
2067; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
2068; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
2069; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
2070; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
2071; CHECK-NEXT:    [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
2072; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
2073; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
2074; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2075; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
2076;
2077  %tmp1 = load <4 x i16>, ptr %A
2078  %tmp2 = load <4 x i16>, ptr %B
2079  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
2080  ret <4 x i16> %tmp3
2081}
2082
2083define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
2084; CHECK-LABEL: define <2 x i32> @uqrshl2s(
2085; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2086; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2087; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2088; CHECK-NEXT:    call void @llvm.donothing()
2089; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2090; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2091; CHECK:       3:
2092; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2093; CHECK-NEXT:    unreachable
2094; CHECK:       4:
2095; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
2096; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2097; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2098; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2099; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
2100; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2101; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2102; CHECK:       8:
2103; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2104; CHECK-NEXT:    unreachable
2105; CHECK:       9:
2106; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
2107; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2108; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2109; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2110; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
2111; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
2112; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
2113; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
2114; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
2115; CHECK-NEXT:    [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
2116; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
2117; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
2118; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2119; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
2120;
2121  %tmp1 = load <2 x i32>, ptr %A
2122  %tmp2 = load <2 x i32>, ptr %B
2123  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
2124  ret <2 x i32> %tmp3
2125}
2126
2127define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
2128; CHECK-LABEL: define <16 x i8> @sqrshl16b(
2129; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2130; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2131; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2132; CHECK-NEXT:    call void @llvm.donothing()
2133; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2134; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2135; CHECK:       3:
2136; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2137; CHECK-NEXT:    unreachable
2138; CHECK:       4:
2139; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
2140; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2141; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2142; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2143; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
2144; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2145; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2146; CHECK:       8:
2147; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2148; CHECK-NEXT:    unreachable
2149; CHECK:       9:
2150; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
2151; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2152; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2153; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2154; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
2155; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
2156; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2157; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2158; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2159; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
2160; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
2161; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
2162; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2163; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2164; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
2165;
2166  %tmp1 = load <16 x i8>, ptr %A
2167  %tmp2 = load <16 x i8>, ptr %B
2168  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
2169  ret <16 x i8> %tmp3
2170}
2171
2172define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
2173; CHECK-LABEL: define <8 x i16> @sqrshl8h(
2174; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2175; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2176; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2177; CHECK-NEXT:    call void @llvm.donothing()
2178; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2179; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2180; CHECK:       3:
2181; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2182; CHECK-NEXT:    unreachable
2183; CHECK:       4:
2184; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
2185; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2186; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2187; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2188; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
2189; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2190; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2191; CHECK:       8:
2192; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2193; CHECK-NEXT:    unreachable
2194; CHECK:       9:
2195; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
2196; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2197; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2198; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2199; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
2200; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
2201; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2202; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2203; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2204; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
2205; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
2206; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
2207; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2208; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2209; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
2210;
2211  %tmp1 = load <8 x i16>, ptr %A
2212  %tmp2 = load <8 x i16>, ptr %B
2213  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
2214  ret <8 x i16> %tmp3
2215}
2216
2217define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
2218; CHECK-LABEL: define <4 x i32> @sqrshl4s(
2219; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2220; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2221; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2222; CHECK-NEXT:    call void @llvm.donothing()
2223; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2224; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2225; CHECK:       3:
2226; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2227; CHECK-NEXT:    unreachable
2228; CHECK:       4:
2229; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
2230; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2231; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2232; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2233; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
2234; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2235; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2236; CHECK:       8:
2237; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2238; CHECK-NEXT:    unreachable
2239; CHECK:       9:
2240; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
2241; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2242; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2243; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2244; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
2245; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
2246; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2247; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2248; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2249; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
2250; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
2251; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
2252; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2253; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2254; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2255;
2256  %tmp1 = load <4 x i32>, ptr %A
2257  %tmp2 = load <4 x i32>, ptr %B
2258  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
2259  ret <4 x i32> %tmp3
2260}
2261
2262define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
2263; CHECK-LABEL: define <2 x i64> @sqrshl2d(
2264; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2265; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2266; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2267; CHECK-NEXT:    call void @llvm.donothing()
2268; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2269; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2270; CHECK:       3:
2271; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2272; CHECK-NEXT:    unreachable
2273; CHECK:       4:
2274; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
2275; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2276; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2277; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2278; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
2279; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2280; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2281; CHECK:       8:
2282; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2283; CHECK-NEXT:    unreachable
2284; CHECK:       9:
2285; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
2286; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2287; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2288; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2289; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
2290; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
2291; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2292; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2293; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2294; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
2295; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
2296; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
2297; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2298; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2299; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
2300;
2301  %tmp1 = load <2 x i64>, ptr %A
2302  %tmp2 = load <2 x i64>, ptr %B
2303  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
2304  ret <2 x i64> %tmp3
2305}
2306
2307define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
2308; CHECK-LABEL: define <1 x i64> @sqrshl1d(
2309; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2310; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2311; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2312; CHECK-NEXT:    call void @llvm.donothing()
2313; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2314; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2315; CHECK:       3:
2316; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2317; CHECK-NEXT:    unreachable
2318; CHECK:       4:
2319; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
2320; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2321; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2322; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2323; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
2324; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2325; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2326; CHECK:       8:
2327; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2328; CHECK-NEXT:    unreachable
2329; CHECK:       9:
2330; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
2331; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2332; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2333; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2334; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
2335; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
2336; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
2337; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
2338; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
2339; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
2340; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
2341; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
2342; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2343; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
2344;
2345  %tmp1 = load <1 x i64>, ptr %A
2346  %tmp2 = load <1 x i64>, ptr %B
2347  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
2348  ret <1 x i64> %tmp3
2349}
2350
2351define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory {
2352; CHECK-LABEL: define <1 x i64> @sqrshl1d_constant(
2353; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2354; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2355; CHECK-NEXT:    call void @llvm.donothing()
2356; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2357; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2358; CHECK:       2:
2359; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2360; CHECK-NEXT:    unreachable
2361; CHECK:       3:
2362; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
2363; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2364; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2365; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2366; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
2367; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
2368; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
2369; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
2370; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2371; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
2372;
2373  %tmp1 = load <1 x i64>, ptr %A
2374  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
2375  ret <1 x i64> %tmp3
2376}
2377
2378define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
2379; CHECK-LABEL: define i64 @sqrshl_scalar(
2380; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2381; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2382; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2383; CHECK-NEXT:    call void @llvm.donothing()
2384; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2385; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2386; CHECK:       3:
2387; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2388; CHECK-NEXT:    unreachable
2389; CHECK:       4:
2390; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
2391; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2392; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2393; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2394; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
2395; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2396; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2397; CHECK:       8:
2398; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2399; CHECK-NEXT:    unreachable
2400; CHECK:       9:
2401; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
2402; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2403; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2404; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2405; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
2406; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
2407; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
2408; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
2409; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
2410; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
2411; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2412; CHECK-NEXT:    ret i64 [[TMP3]]
2413;
2414  %tmp1 = load i64, ptr %A
2415  %tmp2 = load i64, ptr %B
2416  %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
2417  ret i64 %tmp3
2418}
2419
2420define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
2421; CHECK-LABEL: define i64 @sqrshl_scalar_constant(
2422; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2423; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2424; CHECK-NEXT:    call void @llvm.donothing()
2425; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2426; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2427; CHECK:       2:
2428; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2429; CHECK-NEXT:    unreachable
2430; CHECK:       3:
2431; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
2432; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2433; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2434; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2435; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
2436; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[_MSLD]], i64 1)
2437; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
2438; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 1)
2439; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2440; CHECK-NEXT:    ret i64 [[TMP3]]
2441;
2442  %tmp1 = load i64, ptr %A
2443  %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
2444  ret i64 %tmp3
2445}
2446
2447define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
2448; CHECK-LABEL: define <16 x i8> @uqrshl16b(
2449; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2450; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2451; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2452; CHECK-NEXT:    call void @llvm.donothing()
2453; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2454; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2455; CHECK:       3:
2456; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2457; CHECK-NEXT:    unreachable
2458; CHECK:       4:
2459; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
2460; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2461; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2462; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2463; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
2464; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2465; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2466; CHECK:       8:
2467; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2468; CHECK-NEXT:    unreachable
2469; CHECK:       9:
2470; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
2471; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2472; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2473; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2474; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
2475; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
2476; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2477; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2478; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2479; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
2480; CHECK-NEXT:    [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
2481; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
2482; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
2483; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2484; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
2485;
2486  %tmp1 = load <16 x i8>, ptr %A
2487  %tmp2 = load <16 x i8>, ptr %B
2488  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
2489  ret <16 x i8> %tmp3
2490}
2491
2492define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
2493; CHECK-LABEL: define <8 x i16> @uqrshl8h(
2494; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2495; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2496; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2497; CHECK-NEXT:    call void @llvm.donothing()
2498; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2499; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2500; CHECK:       3:
2501; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2502; CHECK-NEXT:    unreachable
2503; CHECK:       4:
2504; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
2505; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2506; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2507; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2508; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
2509; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2510; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2511; CHECK:       8:
2512; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2513; CHECK-NEXT:    unreachable
2514; CHECK:       9:
2515; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
2516; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2517; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2518; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2519; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
2520; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
2521; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2522; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2523; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2524; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
2525; CHECK-NEXT:    [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
2526; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
2527; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
2528; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2529; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
2530;
2531  %tmp1 = load <8 x i16>, ptr %A
2532  %tmp2 = load <8 x i16>, ptr %B
2533  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
2534  ret <8 x i16> %tmp3
2535}
2536
2537define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
2538; CHECK-LABEL: define <4 x i32> @uqrshl4s(
2539; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2540; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2541; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2542; CHECK-NEXT:    call void @llvm.donothing()
2543; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2544; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2545; CHECK:       3:
2546; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2547; CHECK-NEXT:    unreachable
2548; CHECK:       4:
2549; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
2550; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2551; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2552; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2553; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
2554; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2555; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2556; CHECK:       8:
2557; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2558; CHECK-NEXT:    unreachable
2559; CHECK:       9:
2560; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
2561; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2562; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2563; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2564; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
2565; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
2566; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2567; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2568; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2569; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
2570; CHECK-NEXT:    [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
2571; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
2572; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
2573; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2574; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2575;
2576  %tmp1 = load <4 x i32>, ptr %A
2577  %tmp2 = load <4 x i32>, ptr %B
2578  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
2579  ret <4 x i32> %tmp3
2580}
2581
2582define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
2583; CHECK-LABEL: define <2 x i64> @uqrshl2d(
2584; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2585; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2586; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2587; CHECK-NEXT:    call void @llvm.donothing()
2588; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2589; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2590; CHECK:       3:
2591; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2592; CHECK-NEXT:    unreachable
2593; CHECK:       4:
2594; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
2595; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2596; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2597; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2598; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
2599; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2600; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2601; CHECK:       8:
2602; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2603; CHECK-NEXT:    unreachable
2604; CHECK:       9:
2605; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
2606; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2607; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2608; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2609; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
2610; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
2611; CHECK-NEXT:    [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
2612; CHECK-NEXT:    [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
2613; CHECK-NEXT:    [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
2614; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
2615; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
2616; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
2617; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
2618; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2619; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
2620;
2621  %tmp1 = load <2 x i64>, ptr %A
2622  %tmp2 = load <2 x i64>, ptr %B
2623  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
2624  ret <2 x i64> %tmp3
2625}
2626
2627define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
2628; CHECK-LABEL: define <1 x i64> @uqrshl1d(
2629; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2630; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2631; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2632; CHECK-NEXT:    call void @llvm.donothing()
2633; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2634; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2635; CHECK:       3:
2636; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2637; CHECK-NEXT:    unreachable
2638; CHECK:       4:
2639; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
2640; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2641; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2642; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2643; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
2644; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2645; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2646; CHECK:       8:
2647; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2648; CHECK-NEXT:    unreachable
2649; CHECK:       9:
2650; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
2651; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2652; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2653; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2654; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
2655; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
2656; CHECK-NEXT:    [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
2657; CHECK-NEXT:    [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
2658; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
2659; CHECK-NEXT:    [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
2660; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
2661; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
2662; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2663; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
2664;
2665  %tmp1 = load <1 x i64>, ptr %A
2666  %tmp2 = load <1 x i64>, ptr %B
2667  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
2668  ret <1 x i64> %tmp3
2669}
2670
2671define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory {
2672; CHECK-LABEL: define <1 x i64> @uqrshl1d_constant(
2673; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2674; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2675; CHECK-NEXT:    call void @llvm.donothing()
2676; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2677; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2678; CHECK:       2:
2679; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2680; CHECK-NEXT:    unreachable
2681; CHECK:       3:
2682; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
2683; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2684; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2685; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2686; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
2687; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
2688; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
2689; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
2690; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2691; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
2692;
2693  %tmp1 = load <1 x i64>, ptr %A
2694  %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
2695  ret <1 x i64> %tmp3
2696}
2697
2698define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
2699; CHECK-LABEL: define i64 @uqrshl_scalar(
2700; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
2701; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2702; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2703; CHECK-NEXT:    call void @llvm.donothing()
2704; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2705; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
2706; CHECK:       3:
2707; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2708; CHECK-NEXT:    unreachable
2709; CHECK:       4:
2710; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
2711; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
2712; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
2713; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
2714; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
2715; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2716; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2717; CHECK:       8:
2718; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2719; CHECK-NEXT:    unreachable
2720; CHECK:       9:
2721; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B]], align 8
2722; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
2723; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
2724; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
2725; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
2726; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
2727; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
2728; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
2729; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
2730; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
2731; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2732; CHECK-NEXT:    ret i64 [[TMP3]]
2733;
2734  %tmp1 = load i64, ptr %A
2735  %tmp2 = load i64, ptr %B
2736  %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
2737  ret i64 %tmp3
2738}
2739
2740define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
2741; CHECK-LABEL: define i64 @uqrshl_scalar_constant(
2742; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2743; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2744; CHECK-NEXT:    call void @llvm.donothing()
2745; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2746; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2747; CHECK:       2:
2748; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2749; CHECK-NEXT:    unreachable
2750; CHECK:       3:
2751; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
2752; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2753; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2754; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2755; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
2756; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[_MSLD]], i64 1)
2757; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
2758; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 1)
2759; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2760; CHECK-NEXT:    ret i64 [[TMP3]]
2761;
2762  %tmp1 = load i64, ptr %A
2763  %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
2764  ret i64 %tmp3
2765}
2766
2767declare <8 x i8>  @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
2768declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
2769declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
2770declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
2771declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
2772
2773declare <8 x i8>  @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
2774declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
2775declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
2776declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
2777declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
2778
2779declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
2780declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
2781declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
2782declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
2783
2784declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
2785declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
2786declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
2787declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
2788
2789define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory {
2790; CHECK-LABEL: define <8 x i8> @urshr8b(
2791; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2792; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2793; CHECK-NEXT:    call void @llvm.donothing()
2794; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2795; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2796; CHECK:       2:
2797; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2798; CHECK-NEXT:    unreachable
2799; CHECK:       3:
2800; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
2801; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2802; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2803; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2804; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
2805; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
2806; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
2807; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
2808; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2809; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
2810;
2811  %tmp1 = load <8 x i8>, ptr %A
2812  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2813  ret <8 x i8> %tmp3
2814}
2815
2816define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory {
2817; CHECK-LABEL: define <4 x i16> @urshr4h(
2818; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2819; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2820; CHECK-NEXT:    call void @llvm.donothing()
2821; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2822; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2823; CHECK:       2:
2824; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2825; CHECK-NEXT:    unreachable
2826; CHECK:       3:
2827; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
2828; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2829; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2830; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2831; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
2832; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
2833; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
2834; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
2835; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2836; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
2837;
2838  %tmp1 = load <4 x i16>, ptr %A
2839  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
2840  ret <4 x i16> %tmp3
2841}
2842
2843define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory {
2844; CHECK-LABEL: define <2 x i32> @urshr2s(
2845; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2846; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2847; CHECK-NEXT:    call void @llvm.donothing()
2848; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2849; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2850; CHECK:       2:
2851; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2852; CHECK-NEXT:    unreachable
2853; CHECK:       3:
2854; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
2855; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2856; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2857; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2858; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
2859; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
2860; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
2861; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
2862; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2863; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
2864;
2865  %tmp1 = load <2 x i32>, ptr %A
2866  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
2867  ret <2 x i32> %tmp3
2868}
2869
2870define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory {
2871; CHECK-LABEL: define <16 x i8> @urshr16b(
2872; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2873; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2874; CHECK-NEXT:    call void @llvm.donothing()
2875; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2876; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2877; CHECK:       2:
2878; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2879; CHECK-NEXT:    unreachable
2880; CHECK:       3:
2881; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
2882; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2883; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2884; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2885; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
2886; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
2887; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
2888; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
2889; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2890; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
2891;
2892  %tmp1 = load <16 x i8>, ptr %A
2893  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
2894  ret <16 x i8> %tmp3
2895}
2896
2897define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory {
2898; CHECK-LABEL: define <8 x i16> @urshr8h(
2899; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2900; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2901; CHECK-NEXT:    call void @llvm.donothing()
2902; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2903; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2904; CHECK:       2:
2905; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2906; CHECK-NEXT:    unreachable
2907; CHECK:       3:
2908; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
2909; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2910; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2911; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2912; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
2913; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
2914; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
2915; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
2916; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2917; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
2918;
2919  %tmp1 = load <8 x i16>, ptr %A
2920  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
2921  ret <8 x i16> %tmp3
2922}
2923
2924define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory {
2925; CHECK-LABEL: define <4 x i32> @urshr4s(
2926; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2927; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2928; CHECK-NEXT:    call void @llvm.donothing()
2929; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2930; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2931; CHECK:       2:
2932; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2933; CHECK-NEXT:    unreachable
2934; CHECK:       3:
2935; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
2936; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2937; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2938; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2939; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
2940; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
2941; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
2942; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
2943; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2944; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
2945;
2946  %tmp1 = load <4 x i32>, ptr %A
2947  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
2948  ret <4 x i32> %tmp3
2949}
2950
2951define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory {
2952; CHECK-LABEL: define <2 x i64> @urshr2d(
2953; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2954; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2955; CHECK-NEXT:    call void @llvm.donothing()
2956; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2957; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2958; CHECK:       2:
2959; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2960; CHECK-NEXT:    unreachable
2961; CHECK:       3:
2962; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
2963; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2964; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2965; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2966; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
2967; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
2968; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
2969; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
2970; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2971; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
2972;
2973  %tmp1 = load <2 x i64>, ptr %A
2974  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
2975  ret <2 x i64> %tmp3
2976}
2977
2978define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory {
2979; CHECK-LABEL: define <1 x i64> @urshr1d(
2980; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
2981; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2982; CHECK-NEXT:    call void @llvm.donothing()
2983; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2984; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
2985; CHECK:       2:
2986; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
2987; CHECK-NEXT:    unreachable
2988; CHECK:       3:
2989; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
2990; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
2991; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
2992; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2993; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
2994; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
2995; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
2996; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
2997; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
2998; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
2999;
3000  %tmp1 = load <1 x i64>, ptr %A
3001  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
3002  ret <1 x i64> %tmp3
3003}
3004
3005define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory {
3006; CHECK-LABEL: define i64 @urshr_scalar(
3007; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3008; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3009; CHECK-NEXT:    call void @llvm.donothing()
3010; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3011; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3012; CHECK:       2:
3013; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3014; CHECK-NEXT:    unreachable
3015; CHECK:       3:
3016; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
3017; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3018; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3019; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3020; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
3021; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 -1)
3022; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
3023; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
3024; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
3025; CHECK-NEXT:    ret i64 [[TMP3]]
3026;
3027  %tmp1 = load i64, ptr %A
3028  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
3029  ret i64 %tmp3
3030}
3031
3032define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory {
3033; CHECK-LABEL: define <8 x i8> @srshr8b(
3034; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3035; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3036; CHECK-NEXT:    call void @llvm.donothing()
3037; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3038; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3039; CHECK:       2:
3040; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3041; CHECK-NEXT:    unreachable
3042; CHECK:       3:
3043; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
3044; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3045; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3046; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3047; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
3048; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
3049; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
3050; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
3051; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3052; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
3053;
3054  %tmp1 = load <8 x i8>, ptr %A
3055  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3056  ret <8 x i8> %tmp3
3057}
3058
3059define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory {
3060; CHECK-LABEL: define <4 x i16> @srshr4h(
3061; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3062; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3063; CHECK-NEXT:    call void @llvm.donothing()
3064; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3065; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3066; CHECK:       2:
3067; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3068; CHECK-NEXT:    unreachable
3069; CHECK:       3:
3070; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
3071; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3072; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3073; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3074; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
3075; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
3076; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
3077; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
3078; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3079; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
3080;
3081  %tmp1 = load <4 x i16>, ptr %A
3082  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
3083  ret <4 x i16> %tmp3
3084}
3085
3086define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory {
3087; CHECK-LABEL: define <2 x i32> @srshr2s(
3088; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3089; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3090; CHECK-NEXT:    call void @llvm.donothing()
3091; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3092; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3093; CHECK:       2:
3094; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3095; CHECK-NEXT:    unreachable
3096; CHECK:       3:
3097; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
3098; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3099; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3100; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3101; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
3102; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
3103; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
3104; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
3105; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3106; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
3107;
3108  %tmp1 = load <2 x i32>, ptr %A
3109  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
3110  ret <2 x i32> %tmp3
3111}
3112
3113define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory {
3114; CHECK-LABEL: define <16 x i8> @srshr16b(
3115; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3116; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3117; CHECK-NEXT:    call void @llvm.donothing()
3118; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3119; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3120; CHECK:       2:
3121; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3122; CHECK-NEXT:    unreachable
3123; CHECK:       3:
3124; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
3125; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3126; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3127; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3128; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
3129; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
3130; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
3131; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
3132; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3133; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
3134;
3135  %tmp1 = load <16 x i8>, ptr %A
3136  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
3137  ret <16 x i8> %tmp3
3138}
3139
3140define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory {
3141; CHECK-LABEL: define <8 x i16> @srshr8h(
3142; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3143; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3144; CHECK-NEXT:    call void @llvm.donothing()
3145; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3146; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3147; CHECK:       2:
3148; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3149; CHECK-NEXT:    unreachable
3150; CHECK:       3:
3151; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3152; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3153; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3154; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3155; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
3156; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
3157; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
3158; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
3159; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3160; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
3161;
3162  %tmp1 = load <8 x i16>, ptr %A
3163  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
3164  ret <8 x i16> %tmp3
3165}
3166
3167define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory {
3168; CHECK-LABEL: define <4 x i32> @srshr4s(
3169; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3170; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3171; CHECK-NEXT:    call void @llvm.donothing()
3172; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3173; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3174; CHECK:       2:
3175; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3176; CHECK-NEXT:    unreachable
3177; CHECK:       3:
3178; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3179; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3180; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3181; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3182; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
3183; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
3184; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
3185; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
3186; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3187; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
3188;
3189  %tmp1 = load <4 x i32>, ptr %A
3190  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
3191  ret <4 x i32> %tmp3
3192}
3193
3194define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory {
3195; CHECK-LABEL: define <2 x i64> @srshr2d(
3196; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3197; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3198; CHECK-NEXT:    call void @llvm.donothing()
3199; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3200; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3201; CHECK:       2:
3202; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3203; CHECK-NEXT:    unreachable
3204; CHECK:       3:
3205; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3206; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3207; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3208; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3209; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
3210; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
3211; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
3212; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
3213; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3214; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
3215;
3216  %tmp1 = load <2 x i64>, ptr %A
3217  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
3218  ret <2 x i64> %tmp3
3219}
3220
3221define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory {
3222; CHECK-LABEL: define <1 x i64> @srshr1d(
3223; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3224; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3225; CHECK-NEXT:    call void @llvm.donothing()
3226; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3227; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3228; CHECK:       2:
3229; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3230; CHECK-NEXT:    unreachable
3231; CHECK:       3:
3232; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
3233; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3234; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3235; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3236; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
3237; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
3238; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
3239; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
3240; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3241; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
3242;
3243  %tmp1 = load <1 x i64>, ptr %A
3244  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
3245  ret <1 x i64> %tmp3
3246}
3247
3248define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory {
3249; CHECK-LABEL: define i64 @srshr_scalar(
3250; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3251; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3252; CHECK-NEXT:    call void @llvm.donothing()
3253; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3254; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3255; CHECK:       2:
3256; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3257; CHECK-NEXT:    unreachable
3258; CHECK:       3:
3259; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
3260; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3261; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3262; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3263; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
3264; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 -1)
3265; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
3266; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
3267; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
3268; CHECK-NEXT:    ret i64 [[TMP3]]
3269;
3270  %tmp1 = load i64, ptr %A
3271  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
3272  ret i64 %tmp3
3273}
3274
3275define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory {
3276; CHECK-LABEL: define <8 x i8> @sqshlu8b(
3277; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3278; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3279; CHECK-NEXT:    call void @llvm.donothing()
3280; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3281; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3282; CHECK:       2:
3283; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3284; CHECK-NEXT:    unreachable
3285; CHECK:       3:
3286; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
3287; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3288; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3289; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3290; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
3291; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
3292; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
3293; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
3294; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3295; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
3296;
3297  %tmp1 = load <8 x i8>, ptr %A
3298  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
3299  ret <8 x i8> %tmp3
3300}
3301
3302define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory {
3303; CHECK-LABEL: define <4 x i16> @sqshlu4h(
3304; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3305; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3306; CHECK-NEXT:    call void @llvm.donothing()
3307; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3308; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3309; CHECK:       2:
3310; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3311; CHECK-NEXT:    unreachable
3312; CHECK:       3:
3313; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
3314; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3315; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3316; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3317; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
3318; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
3319; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
3320; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
3321; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3322; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
3323;
3324  %tmp1 = load <4 x i16>, ptr %A
3325  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
3326  ret <4 x i16> %tmp3
3327}
3328
3329define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory {
3330; CHECK-LABEL: define <2 x i32> @sqshlu2s(
3331; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3332; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3333; CHECK-NEXT:    call void @llvm.donothing()
3334; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3335; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3336; CHECK:       2:
3337; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3338; CHECK-NEXT:    unreachable
3339; CHECK:       3:
3340; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
3341; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3342; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3343; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3344; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
3345; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
3346; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
3347; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
3348; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3349; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
3350;
3351  %tmp1 = load <2 x i32>, ptr %A
3352  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
3353  ret <2 x i32> %tmp3
3354}
3355
3356define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory {
3357; CHECK-LABEL: define <16 x i8> @sqshlu16b(
3358; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3359; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3360; CHECK-NEXT:    call void @llvm.donothing()
3361; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3362; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3363; CHECK:       2:
3364; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3365; CHECK-NEXT:    unreachable
3366; CHECK:       3:
3367; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
3368; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3369; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3370; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3371; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
3372; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
3373; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
3374; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
3375; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3376; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
3377;
3378  %tmp1 = load <16 x i8>, ptr %A
3379  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
3380  ret <16 x i8> %tmp3
3381}
3382
3383define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory {
3384; CHECK-LABEL: define <8 x i16> @sqshlu8h(
3385; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3386; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3387; CHECK-NEXT:    call void @llvm.donothing()
3388; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3389; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3390; CHECK:       2:
3391; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3392; CHECK-NEXT:    unreachable
3393; CHECK:       3:
3394; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3395; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3396; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3397; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3398; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
3399; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
3400; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
3401; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
3402; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3403; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
3404;
3405  %tmp1 = load <8 x i16>, ptr %A
3406  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
3407  ret <8 x i16> %tmp3
3408}
3409
3410define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory {
3411; CHECK-LABEL: define <4 x i32> @sqshlu4s(
3412; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3413; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3414; CHECK-NEXT:    call void @llvm.donothing()
3415; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3416; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3417; CHECK:       2:
3418; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3419; CHECK-NEXT:    unreachable
3420; CHECK:       3:
3421; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3422; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3423; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3424; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3425; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
3426; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
3427; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
3428; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
3429; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3430; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
3431;
3432  %tmp1 = load <4 x i32>, ptr %A
3433  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
3434  ret <4 x i32> %tmp3
3435}
3436
3437define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory {
3438; CHECK-LABEL: define <2 x i64> @sqshlu2d(
3439; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3440; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3441; CHECK-NEXT:    call void @llvm.donothing()
3442; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3443; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3444; CHECK:       2:
3445; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3446; CHECK-NEXT:    unreachable
3447; CHECK:       3:
3448; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3449; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3450; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3451; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3452; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
3453; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
3454; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
3455; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
3456; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3457; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
3458;
3459  %tmp1 = load <2 x i64>, ptr %A
3460  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
3461  ret <2 x i64> %tmp3
3462}
3463
3464define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory {
3465; CHECK-LABEL: define <1 x i64> @sqshlu1d_constant(
3466; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3467; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3468; CHECK-NEXT:    call void @llvm.donothing()
3469; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3470; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3471; CHECK:       2:
3472; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3473; CHECK-NEXT:    unreachable
3474; CHECK:       3:
3475; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
3476; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3477; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3478; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3479; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
3480; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
3481; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
3482; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
3483; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3484; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
3485;
3486  %tmp1 = load <1 x i64>, ptr %A
3487  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
3488  ret <1 x i64> %tmp3
3489}
3490
3491define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory {
3492; CHECK-LABEL: define i64 @sqshlu_i64_constant(
3493; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3494; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3495; CHECK-NEXT:    call void @llvm.donothing()
3496; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3497; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3498; CHECK:       2:
3499; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3500; CHECK-NEXT:    unreachable
3501; CHECK:       3:
3502; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
3503; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3504; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3505; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3506; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
3507; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[_MSLD]], i64 1)
3508; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
3509; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[TMP1]], i64 1)
3510; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
3511; CHECK-NEXT:    ret i64 [[TMP3]]
3512;
3513  %tmp1 = load i64, ptr %A
3514  %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
3515  ret i64 %tmp3
3516}
3517
3518define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory {
3519; CHECK-LABEL: define i32 @sqshlu_i32_constant(
3520; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3521; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3522; CHECK-NEXT:    call void @llvm.donothing()
3523; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3524; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3525; CHECK:       2:
3526; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3527; CHECK-NEXT:    unreachable
3528; CHECK:       3:
3529; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
3530; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3531; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3532; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3533; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
3534; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[_MSLD]], i32 1)
3535; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP7]], 0
3536; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[TMP1]], i32 1)
3537; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
3538; CHECK-NEXT:    ret i32 [[TMP3]]
3539;
3540  %tmp1 = load i32, ptr %A
3541  %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
3542  ret i32 %tmp3
3543}
3544
3545declare <8 x i8>  @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
3546declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
3547declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
3548declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
3549declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
3550declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
3551
3552declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
3553declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
3554declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
3555declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
3556
3557define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory {
3558; CHECK-LABEL: define <8 x i8> @rshrn8b(
3559; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3560; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3561; CHECK-NEXT:    call void @llvm.donothing()
3562; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3563; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3564; CHECK:       2:
3565; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3566; CHECK-NEXT:    unreachable
3567; CHECK:       3:
3568; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3569; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3570; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3571; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3572; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
3573; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
3574; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
3575; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
3576; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
3577; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
3578;
3579  %tmp1 = load <8 x i16>, ptr %A
3580  %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
3581  ret <8 x i8> %tmp3
3582}
3583
3584define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory {
3585; CHECK-LABEL: define <4 x i16> @rshrn4h(
3586; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3587; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3588; CHECK-NEXT:    call void @llvm.donothing()
3589; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3590; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3591; CHECK:       2:
3592; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3593; CHECK-NEXT:    unreachable
3594; CHECK:       3:
3595; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3596; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3597; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3598; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3599; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
3600; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
3601; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
3602; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
3603; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
3604; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
3605;
3606  %tmp1 = load <4 x i32>, ptr %A
3607  %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
3608  ret <4 x i16> %tmp3
3609}
3610
3611define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory {
3612; CHECK-LABEL: define <2 x i32> @rshrn2s(
3613; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3614; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3615; CHECK-NEXT:    call void @llvm.donothing()
3616; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3617; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3618; CHECK:       2:
3619; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3620; CHECK-NEXT:    unreachable
3621; CHECK:       3:
3622; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3623; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3624; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3625; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3626; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
3627; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
3628; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
3629; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
3630; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
3631; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
3632;
3633  %tmp1 = load <2 x i64>, ptr %A
3634  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
3635  ret <2 x i32> %tmp3
3636}
3637
3638define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
3639; CHECK-LABEL: define <16 x i8> @rshrn16b(
3640; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3641; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3642; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3643; CHECK-NEXT:    call void @llvm.donothing()
3644; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3645; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3646; CHECK:       3:
3647; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3648; CHECK-NEXT:    unreachable
3649; CHECK:       4:
3650; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
3651; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3652; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3653; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3654; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
3655; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
3656; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3657; CHECK:       8:
3658; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3659; CHECK-NEXT:    unreachable
3660; CHECK:       9:
3661; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3662; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3663; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3664; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3665; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
3666; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
3667; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
3668; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
3669; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3670; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3671; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3672; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
3673;
3674  %out = load <8 x i8>, ptr %ret
3675  %tmp1 = load <8 x i16>, ptr %A
3676  %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
3677  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3678  ret <16 x i8> %tmp4
3679}
3680
3681define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
3682; CHECK-LABEL: define <8 x i16> @rshrn8h(
3683; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3684; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3685; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3686; CHECK-NEXT:    call void @llvm.donothing()
3687; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3688; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3689; CHECK:       3:
3690; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3691; CHECK-NEXT:    unreachable
3692; CHECK:       4:
3693; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
3694; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3695; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3696; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3697; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
3698; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
3699; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3700; CHECK:       8:
3701; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3702; CHECK-NEXT:    unreachable
3703; CHECK:       9:
3704; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3705; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3706; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3707; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3708; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
3709; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
3710; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
3711; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
3712; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3713; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3714; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3715; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
3716;
3717  %out = load <4 x i16>, ptr %ret
3718  %tmp1 = load <4 x i32>, ptr %A
3719  %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
3720  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3721  ret <8 x i16> %tmp4
3722}
3723
3724define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
3725; CHECK-LABEL: define <4 x i32> @rshrn4s(
3726; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3727; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3728; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3729; CHECK-NEXT:    call void @llvm.donothing()
3730; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3731; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3732; CHECK:       3:
3733; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3734; CHECK-NEXT:    unreachable
3735; CHECK:       4:
3736; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
3737; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3738; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3739; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3740; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
3741; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
3742; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3743; CHECK:       8:
3744; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3745; CHECK-NEXT:    unreachable
3746; CHECK:       9:
3747; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3748; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3749; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3750; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3751; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
3752; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
3753; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
3754; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
3755; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3756; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3757; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3758; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
3759;
3760  %out = load <2 x i32>, ptr %ret
3761  %tmp1 = load <2 x i64>, ptr %A
3762  %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
3763  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3764  ret <4 x i32> %tmp4
3765}
3766
3767declare <8 x i8>  @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
3768declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
3769declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
3770
3771define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory {
3772; CHECK-LABEL: define <8 x i8> @shrn8b(
3773; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3774; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3775; CHECK-NEXT:    call void @llvm.donothing()
3776; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3777; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3778; CHECK:       2:
3779; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3780; CHECK-NEXT:    unreachable
3781; CHECK:       3:
3782; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3783; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3784; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3785; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3786; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
3787; CHECK-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
3788; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
3789; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
3790; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8>
3791; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
3792; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3793; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
3794;
3795  %tmp1 = load <8 x i16>, ptr %A
3796  %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3797  %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
3798  ret <8 x i8> %tmp3
3799}
3800
3801define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory {
3802; CHECK-LABEL: define <4 x i16> @shrn4h(
3803; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3804; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3805; CHECK-NEXT:    call void @llvm.donothing()
3806; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3807; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3808; CHECK:       2:
3809; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3810; CHECK-NEXT:    unreachable
3811; CHECK:       3:
3812; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3813; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3814; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3815; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3816; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
3817; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
3818; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
3819; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
3820; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
3821; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
3822; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3823; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
3824;
3825  %tmp1 = load <4 x i32>, ptr %A
3826  %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3827  %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
3828  ret <4 x i16> %tmp3
3829}
3830
3831define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory {
3832; CHECK-LABEL: define <2 x i32> @shrn2s(
3833; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
3834; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3835; CHECK-NEXT:    call void @llvm.donothing()
3836; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3837; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3838; CHECK:       2:
3839; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3840; CHECK-NEXT:    unreachable
3841; CHECK:       3:
3842; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3843; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
3844; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
3845; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
3846; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
3847; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
3848; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
3849; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
3850; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
3851; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
3852; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
3853; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
3854;
3855  %tmp1 = load <2 x i64>, ptr %A
3856  %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3857  %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
3858  ret <2 x i32> %tmp3
3859}
3860
3861define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
3862; CHECK-LABEL: define <16 x i8> @shrn16b(
3863; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3864; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3865; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3866; CHECK-NEXT:    call void @llvm.donothing()
3867; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3868; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3869; CHECK:       3:
3870; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3871; CHECK-NEXT:    unreachable
3872; CHECK:       4:
3873; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
3874; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3875; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3876; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3877; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
3878; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
3879; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3880; CHECK:       8:
3881; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3882; CHECK-NEXT:    unreachable
3883; CHECK:       9:
3884; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
3885; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3886; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3887; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3888; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
3889; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], splat (i16 1)
3890; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
3891; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
3892; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8>
3893; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
3894; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3895; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3896; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
3897; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
3898;
3899  %out = load <8 x i8>, ptr %ret
3900  %tmp1 = load <8 x i16>, ptr %A
3901  %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
3902  %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
3903  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3904  ret <16 x i8> %tmp4
3905}
3906
3907define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
3908; CHECK-LABEL: define <8 x i16> @shrn8h(
3909; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3910; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3911; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3912; CHECK-NEXT:    call void @llvm.donothing()
3913; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3914; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3915; CHECK:       3:
3916; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3917; CHECK-NEXT:    unreachable
3918; CHECK:       4:
3919; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
3920; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3921; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3922; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3923; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
3924; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
3925; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3926; CHECK:       8:
3927; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3928; CHECK-NEXT:    unreachable
3929; CHECK:       9:
3930; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
3931; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3932; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3933; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3934; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
3935; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], splat (i32 1)
3936; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
3937; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
3938; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
3939; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
3940; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3941; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3942; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
3943; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
3944;
3945  %out = load <4 x i16>, ptr %ret
3946  %tmp1 = load <4 x i32>, ptr %A
3947  %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
3948  %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
3949  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3950  ret <8 x i16> %tmp4
3951}
3952
3953define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
3954; CHECK-LABEL: define <4 x i32> @shrn4s(
3955; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
3956; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
3957; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3958; CHECK-NEXT:    call void @llvm.donothing()
3959; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
3960; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3961; CHECK:       3:
3962; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3963; CHECK-NEXT:    unreachable
3964; CHECK:       4:
3965; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
3966; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
3967; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
3968; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
3969; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
3970; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
3971; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
3972; CHECK:       8:
3973; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
3974; CHECK-NEXT:    unreachable
3975; CHECK:       9:
3976; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
3977; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
3978; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
3979; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
3980; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
3981; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], splat (i64 1)
3982; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
3983; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
3984; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32>
3985; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
3986; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3987; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3988; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
3989; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
3990;
3991  %out = load <2 x i32>, ptr %ret
3992  %tmp1 = load <2 x i64>, ptr %A
3993  %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
3994  %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
3995  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3996  ret <4 x i32> %tmp4
3997}
3998
3999declare <8 x i8>  @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
4000declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
4001declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
4002
4003define i32 @sqshrn1s(i64 %A) nounwind sanitize_memory {
4004; CHECK-LABEL: define i32 @sqshrn1s(
4005; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
4006; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4007; CHECK-NEXT:    call void @llvm.donothing()
4008; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[TMP1]], i32 1)
4009; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
4010; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[A]], i32 1)
4011; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
4012; CHECK-NEXT:    ret i32 [[TMP]]
4013;
4014  %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
4015  ret i32 %tmp
4016}
4017
4018define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory {
4019; CHECK-LABEL: define <8 x i8> @sqshrn8b(
4020; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4021; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4022; CHECK-NEXT:    call void @llvm.donothing()
4023; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4024; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4025; CHECK:       2:
4026; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4027; CHECK-NEXT:    unreachable
4028; CHECK:       3:
4029; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4030; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4031; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4032; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4033; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
4034; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
4035; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
4036; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
4037; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
4038; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
4039;
4040  %tmp1 = load <8 x i16>, ptr %A
4041  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
4042  ret <8 x i8> %tmp3
4043}
4044
4045define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory {
4046; CHECK-LABEL: define <4 x i16> @sqshrn4h(
4047; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4048; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4049; CHECK-NEXT:    call void @llvm.donothing()
4050; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4051; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4052; CHECK:       2:
4053; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4054; CHECK-NEXT:    unreachable
4055; CHECK:       3:
4056; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4057; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4058; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4059; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4060; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
4061; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
4062; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
4063; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
4064; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
4065; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
4066;
4067  %tmp1 = load <4 x i32>, ptr %A
4068  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
4069  ret <4 x i16> %tmp3
4070}
4071
4072define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory {
4073; CHECK-LABEL: define <2 x i32> @sqshrn2s(
4074; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4075; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4076; CHECK-NEXT:    call void @llvm.donothing()
4077; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4078; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4079; CHECK:       2:
4080; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4081; CHECK-NEXT:    unreachable
4082; CHECK:       3:
4083; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4084; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4085; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4086; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4087; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
4088; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
4089; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
4090; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
4091; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
4092; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
4093;
4094  %tmp1 = load <2 x i64>, ptr %A
4095  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
4096  ret <2 x i32> %tmp3
4097}
4098
4099
4100define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
4101; CHECK-LABEL: define <16 x i8> @sqshrn16b(
4102; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4103; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4104; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4105; CHECK-NEXT:    call void @llvm.donothing()
4106; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4107; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4108; CHECK:       3:
4109; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4110; CHECK-NEXT:    unreachable
4111; CHECK:       4:
4112; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
4113; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4114; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4115; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4116; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
4117; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4118; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4119; CHECK:       8:
4120; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4121; CHECK-NEXT:    unreachable
4122; CHECK:       9:
4123; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4124; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4125; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4126; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4127; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
4128; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
4129; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
4130; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
4131; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4132; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4133; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4134; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
4135;
4136  %out = load <8 x i8>, ptr %ret
4137  %tmp1 = load <8 x i16>, ptr %A
4138  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
4139  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4140  ret <16 x i8> %tmp4
4141}
4142
4143define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
4144; CHECK-LABEL: define <8 x i16> @sqshrn8h(
4145; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4146; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4147; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4148; CHECK-NEXT:    call void @llvm.donothing()
4149; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4150; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4151; CHECK:       3:
4152; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4153; CHECK-NEXT:    unreachable
4154; CHECK:       4:
4155; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
4156; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4157; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4158; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4159; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
4160; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4161; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4162; CHECK:       8:
4163; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4164; CHECK-NEXT:    unreachable
4165; CHECK:       9:
4166; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4167; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4168; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4169; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4170; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
4171; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
4172; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
4173; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
4174; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4175; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4176; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4177; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
4178;
4179  %out = load <4 x i16>, ptr %ret
4180  %tmp1 = load <4 x i32>, ptr %A
4181  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
4182  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4183  ret <8 x i16> %tmp4
4184}
4185
4186define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
4187; CHECK-LABEL: define <4 x i32> @sqshrn4s(
4188; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4189; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4190; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4191; CHECK-NEXT:    call void @llvm.donothing()
4192; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4193; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4194; CHECK:       3:
4195; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4196; CHECK-NEXT:    unreachable
4197; CHECK:       4:
4198; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
4199; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4200; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4201; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4202; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
4203; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4204; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4205; CHECK:       8:
4206; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4207; CHECK-NEXT:    unreachable
4208; CHECK:       9:
4209; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4210; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4211; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4212; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4213; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
4214; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
4215; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
4216; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
4217; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4218; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4219; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4220; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
4221;
4222  %out = load <2 x i32>, ptr %ret
4223  %tmp1 = load <2 x i64>, ptr %A
4224  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
4225  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4226  ret <4 x i32> %tmp4
4227}
4228
4229declare i32  @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
4230declare <8 x i8>  @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
4231declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
4232declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
4233
4234define i32 @sqshrun1s(i64 %A) nounwind sanitize_memory {
4235; CHECK-LABEL: define i32 @sqshrun1s(
4236; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
4237; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4238; CHECK-NEXT:    call void @llvm.donothing()
4239; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[TMP1]], i32 1)
4240; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
4241; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[A]], i32 1)
4242; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
4243; CHECK-NEXT:    ret i32 [[TMP]]
4244;
4245  %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
4246  ret i32 %tmp
4247}
4248
4249define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory {
4250; CHECK-LABEL: define <8 x i8> @sqshrun8b(
4251; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4252; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4253; CHECK-NEXT:    call void @llvm.donothing()
4254; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4255; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4256; CHECK:       2:
4257; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4258; CHECK-NEXT:    unreachable
4259; CHECK:       3:
4260; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4261; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4262; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4263; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4264; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
4265; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[_MSLD]], i32 1)
4266; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
4267; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
4268; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
4269; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
4270;
4271  %tmp1 = load <8 x i16>, ptr %A
4272  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
4273  ret <8 x i8> %tmp3
4274}
4275
4276define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory {
4277; CHECK-LABEL: define <4 x i16> @sqshrun4h(
4278; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4279; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4280; CHECK-NEXT:    call void @llvm.donothing()
4281; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4282; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4283; CHECK:       2:
4284; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4285; CHECK-NEXT:    unreachable
4286; CHECK:       3:
4287; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4288; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4289; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4290; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4291; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
4292; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[_MSLD]], i32 1)
4293; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
4294; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
4295; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
4296; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
4297;
4298  %tmp1 = load <4 x i32>, ptr %A
4299  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
4300  ret <4 x i16> %tmp3
4301}
4302
4303define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory {
4304; CHECK-LABEL: define <2 x i32> @sqshrun2s(
4305; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4306; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4307; CHECK-NEXT:    call void @llvm.donothing()
4308; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4309; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4310; CHECK:       2:
4311; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4312; CHECK-NEXT:    unreachable
4313; CHECK:       3:
4314; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4315; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4316; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4317; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4318; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
4319; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[_MSLD]], i32 1)
4320; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
4321; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
4322; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
4323; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
4324;
4325  %tmp1 = load <2 x i64>, ptr %A
4326  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
4327  ret <2 x i32> %tmp3
4328}
4329
4330define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
4331; CHECK-LABEL: define <16 x i8> @sqshrun16b(
4332; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4333; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4334; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4335; CHECK-NEXT:    call void @llvm.donothing()
4336; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4337; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4338; CHECK:       3:
4339; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4340; CHECK-NEXT:    unreachable
4341; CHECK:       4:
4342; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
4343; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4344; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4345; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4346; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
4347; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4348; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4349; CHECK:       8:
4350; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4351; CHECK-NEXT:    unreachable
4352; CHECK:       9:
4353; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4354; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4355; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4356; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4357; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
4358; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[_MSLD1]], i32 1)
4359; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
4360; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
4361; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4362; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4363; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4364; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
4365;
4366  %out = load <8 x i8>, ptr %ret
4367  %tmp1 = load <8 x i16>, ptr %A
4368  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
4369  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4370  ret <16 x i8> %tmp4
4371}
4372
4373define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
4374; CHECK-LABEL: define <8 x i16> @sqshrun8h(
4375; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4376; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4377; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4378; CHECK-NEXT:    call void @llvm.donothing()
4379; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4380; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4381; CHECK:       3:
4382; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4383; CHECK-NEXT:    unreachable
4384; CHECK:       4:
4385; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
4386; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4387; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4388; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4389; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
4390; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4391; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4392; CHECK:       8:
4393; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4394; CHECK-NEXT:    unreachable
4395; CHECK:       9:
4396; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4397; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4398; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4399; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4400; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
4401; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[_MSLD1]], i32 1)
4402; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
4403; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
4404; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4405; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4406; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4407; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
4408;
4409  %out = load <4 x i16>, ptr %ret
4410  %tmp1 = load <4 x i32>, ptr %A
4411  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
4412  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4413  ret <8 x i16> %tmp4
4414}
4415
4416define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
4417; CHECK-LABEL: define <4 x i32> @sqshrun4s(
4418; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4419; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4420; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4421; CHECK-NEXT:    call void @llvm.donothing()
4422; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4423; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4424; CHECK:       3:
4425; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4426; CHECK-NEXT:    unreachable
4427; CHECK:       4:
4428; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
4429; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4430; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4431; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4432; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
4433; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4434; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4435; CHECK:       8:
4436; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4437; CHECK-NEXT:    unreachable
4438; CHECK:       9:
4439; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4440; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4441; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4442; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4443; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
4444; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[_MSLD1]], i32 1)
4445; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
4446; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
4447; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4448; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4449; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4450; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
4451;
4452  %out = load <2 x i32>, ptr %ret
4453  %tmp1 = load <2 x i64>, ptr %A
4454  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
4455  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4456  ret <4 x i32> %tmp4
4457}
4458
4459declare i32  @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
4460declare <8 x i8>  @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
4461declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
4462declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
4463
4464define i32 @sqrshrn1s(i64 %A) nounwind sanitize_memory {
4465; CHECK-LABEL: define i32 @sqrshrn1s(
4466; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
4467; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4468; CHECK-NEXT:    call void @llvm.donothing()
4469; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[TMP1]], i32 1)
4470; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
4471; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 1)
4472; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
4473; CHECK-NEXT:    ret i32 [[TMP]]
4474;
4475  %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
4476  ret i32 %tmp
4477}
4478
4479define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory {
4480; CHECK-LABEL: define <8 x i8> @sqrshrn8b(
4481; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4482; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4483; CHECK-NEXT:    call void @llvm.donothing()
4484; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4485; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4486; CHECK:       2:
4487; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4488; CHECK-NEXT:    unreachable
4489; CHECK:       3:
4490; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4491; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4492; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4493; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4494; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
4495; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
4496; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
4497; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
4498; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
4499; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
4500;
4501  %tmp1 = load <8 x i16>, ptr %A
4502  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
4503  ret <8 x i8> %tmp3
4504}
4505
4506define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory {
4507; CHECK-LABEL: define <4 x i16> @sqrshrn4h(
4508; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4509; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4510; CHECK-NEXT:    call void @llvm.donothing()
4511; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4512; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4513; CHECK:       2:
4514; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4515; CHECK-NEXT:    unreachable
4516; CHECK:       3:
4517; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4518; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4519; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4520; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4521; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
4522; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
4523; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
4524; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
4525; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
4526; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
4527;
4528  %tmp1 = load <4 x i32>, ptr %A
4529  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
4530  ret <4 x i16> %tmp3
4531}
4532
4533define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory {
4534; CHECK-LABEL: define <2 x i32> @sqrshrn2s(
4535; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4536; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4537; CHECK-NEXT:    call void @llvm.donothing()
4538; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4539; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4540; CHECK:       2:
4541; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4542; CHECK-NEXT:    unreachable
4543; CHECK:       3:
4544; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4545; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4546; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4547; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4548; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
4549; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
4550; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
4551; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
4552; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
4553; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
4554;
4555  %tmp1 = load <2 x i64>, ptr %A
4556  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
4557  ret <2 x i32> %tmp3
4558}
4559
4560define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
4561; CHECK-LABEL: define <16 x i8> @sqrshrn16b(
4562; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4563; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4564; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4565; CHECK-NEXT:    call void @llvm.donothing()
4566; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4567; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4568; CHECK:       3:
4569; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4570; CHECK-NEXT:    unreachable
4571; CHECK:       4:
4572; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
4573; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4574; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4575; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4576; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
4577; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4578; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4579; CHECK:       8:
4580; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4581; CHECK-NEXT:    unreachable
4582; CHECK:       9:
4583; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4584; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4585; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4586; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4587; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
4588; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
4589; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
4590; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
4591; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4592; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4593; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4594; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
4595;
4596  %out = load <8 x i8>, ptr %ret
4597  %tmp1 = load <8 x i16>, ptr %A
4598  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
4599  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4600  ret <16 x i8> %tmp4
4601}
4602
4603define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
4604; CHECK-LABEL: define <8 x i16> @sqrshrn8h(
4605; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4606; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4607; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4608; CHECK-NEXT:    call void @llvm.donothing()
4609; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4610; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4611; CHECK:       3:
4612; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4613; CHECK-NEXT:    unreachable
4614; CHECK:       4:
4615; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
4616; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4617; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4618; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4619; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
4620; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4621; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4622; CHECK:       8:
4623; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4624; CHECK-NEXT:    unreachable
4625; CHECK:       9:
4626; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4627; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4628; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4629; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4630; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
4631; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
4632; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
4633; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
4634; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4635; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4636; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4637; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
4638;
4639  %out = load <4 x i16>, ptr %ret
4640  %tmp1 = load <4 x i32>, ptr %A
4641  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
4642  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4643  ret <8 x i16> %tmp4
4644}
4645
4646define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
4647; CHECK-LABEL: define <4 x i32> @sqrshrn4s(
4648; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4649; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4650; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4651; CHECK-NEXT:    call void @llvm.donothing()
4652; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4653; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4654; CHECK:       3:
4655; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4656; CHECK-NEXT:    unreachable
4657; CHECK:       4:
4658; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
4659; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4660; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4661; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4662; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
4663; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4664; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4665; CHECK:       8:
4666; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4667; CHECK-NEXT:    unreachable
4668; CHECK:       9:
4669; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4670; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4671; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4672; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4673; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
4674; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
4675; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
4676; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
4677; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4678; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4679; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4680; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
4681;
4682  %out = load <2 x i32>, ptr %ret
4683  %tmp1 = load <2 x i64>, ptr %A
4684  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
4685  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4686  ret <4 x i32> %tmp4
4687}
4688
4689declare i32  @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
4690declare <8 x i8>  @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
4691declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
4692declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
4693
4694define i32 @sqrshrun1s(i64 %A) nounwind sanitize_memory {
4695; CHECK-LABEL: define i32 @sqrshrun1s(
4696; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
4697; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4698; CHECK-NEXT:    call void @llvm.donothing()
4699; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[TMP1]], i32 1)
4700; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
4701; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 1)
4702; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
4703; CHECK-NEXT:    ret i32 [[TMP]]
4704;
4705  %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
4706  ret i32 %tmp
4707}
4708
4709define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory {
4710; CHECK-LABEL: define <8 x i8> @sqrshrun8b(
4711; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4712; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4713; CHECK-NEXT:    call void @llvm.donothing()
4714; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4715; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4716; CHECK:       2:
4717; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4718; CHECK-NEXT:    unreachable
4719; CHECK:       3:
4720; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4721; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4722; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4723; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4724; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
4725; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[_MSLD]], i32 1)
4726; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
4727; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
4728; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
4729; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
4730;
4731  %tmp1 = load <8 x i16>, ptr %A
4732  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
4733  ret <8 x i8> %tmp3
4734}
4735
4736define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory {
4737; CHECK-LABEL: define <4 x i16> @sqrshrun4h(
4738; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4739; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4740; CHECK-NEXT:    call void @llvm.donothing()
4741; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4742; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4743; CHECK:       2:
4744; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4745; CHECK-NEXT:    unreachable
4746; CHECK:       3:
4747; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4748; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4749; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4750; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4751; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
4752; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[_MSLD]], i32 1)
4753; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
4754; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
4755; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
4756; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
4757;
4758  %tmp1 = load <4 x i32>, ptr %A
4759  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
4760  ret <4 x i16> %tmp3
4761}
4762
4763define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory {
4764; CHECK-LABEL: define <2 x i32> @sqrshrun2s(
4765; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4766; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4767; CHECK-NEXT:    call void @llvm.donothing()
4768; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4769; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4770; CHECK:       2:
4771; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4772; CHECK-NEXT:    unreachable
4773; CHECK:       3:
4774; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4775; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4776; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4777; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4778; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
4779; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[_MSLD]], i32 1)
4780; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
4781; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
4782; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
4783; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
4784;
4785  %tmp1 = load <2 x i64>, ptr %A
4786  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
4787  ret <2 x i32> %tmp3
4788}
4789
4790define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
4791; CHECK-LABEL: define <16 x i8> @sqrshrun16b(
4792; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4793; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4794; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4795; CHECK-NEXT:    call void @llvm.donothing()
4796; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4797; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4798; CHECK:       3:
4799; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4800; CHECK-NEXT:    unreachable
4801; CHECK:       4:
4802; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
4803; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4804; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4805; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4806; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
4807; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4808; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4809; CHECK:       8:
4810; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4811; CHECK-NEXT:    unreachable
4812; CHECK:       9:
4813; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4814; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4815; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4816; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4817; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
4818; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[_MSLD1]], i32 1)
4819; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
4820; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
4821; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4822; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4823; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4824; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
4825;
4826  %out = load <8 x i8>, ptr %ret
4827  %tmp1 = load <8 x i16>, ptr %A
4828  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
4829  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
4830  ret <16 x i8> %tmp4
4831}
4832
4833define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
4834; CHECK-LABEL: define <8 x i16> @sqrshrun8h(
4835; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4836; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4837; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4838; CHECK-NEXT:    call void @llvm.donothing()
4839; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4840; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4841; CHECK:       3:
4842; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4843; CHECK-NEXT:    unreachable
4844; CHECK:       4:
4845; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
4846; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4847; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4848; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4849; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
4850; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4851; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4852; CHECK:       8:
4853; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4854; CHECK-NEXT:    unreachable
4855; CHECK:       9:
4856; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4857; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4858; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4859; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4860; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
4861; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[_MSLD1]], i32 1)
4862; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
4863; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
4864; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4865; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4866; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4867; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
4868;
4869  %out = load <4 x i16>, ptr %ret
4870  %tmp1 = load <4 x i32>, ptr %A
4871  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
4872  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
4873  ret <8 x i16> %tmp4
4874}
4875
4876define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
4877; CHECK-LABEL: define <4 x i32> @sqrshrun4s(
4878; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
4879; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4880; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
4881; CHECK-NEXT:    call void @llvm.donothing()
4882; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4883; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
4884; CHECK:       3:
4885; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4886; CHECK-NEXT:    unreachable
4887; CHECK:       4:
4888; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
4889; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
4890; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
4891; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
4892; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
4893; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
4894; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
4895; CHECK:       8:
4896; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4897; CHECK-NEXT:    unreachable
4898; CHECK:       9:
4899; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
4900; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
4901; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
4902; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
4903; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
4904; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[_MSLD1]], i32 1)
4905; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
4906; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
4907; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4908; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4909; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
4910; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
4911;
4912  %out = load <2 x i32>, ptr %ret
4913  %tmp1 = load <2 x i64>, ptr %A
4914  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
4915  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4916  ret <4 x i32> %tmp4
4917}
4918
4919declare i32  @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
4920declare <8 x i8>  @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
4921declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
4922declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
4923
4924define i32 @uqrshrn1s(i64 %A) nounwind sanitize_memory {
4925; CHECK-LABEL: define i32 @uqrshrn1s(
4926; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
4927; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4928; CHECK-NEXT:    call void @llvm.donothing()
4929; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[TMP1]], i32 1)
4930; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
4931; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 1)
4932; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
4933; CHECK-NEXT:    ret i32 [[TMP]]
4934;
4935  %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
4936  ret i32 %tmp
4937}
4938
4939define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory {
4940; CHECK-LABEL: define <8 x i8> @uqrshrn8b(
4941; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4942; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4943; CHECK-NEXT:    call void @llvm.donothing()
4944; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4945; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4946; CHECK:       2:
4947; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4948; CHECK-NEXT:    unreachable
4949; CHECK:       3:
4950; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
4951; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4952; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4953; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4954; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
4955; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
4956; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
4957; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
4958; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
4959; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
4960;
4961  %tmp1 = load <8 x i16>, ptr %A
4962  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
4963  ret <8 x i8> %tmp3
4964}
4965
4966define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory {
4967; CHECK-LABEL: define <4 x i16> @uqrshrn4h(
4968; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4969; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4970; CHECK-NEXT:    call void @llvm.donothing()
4971; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4972; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
4973; CHECK:       2:
4974; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
4975; CHECK-NEXT:    unreachable
4976; CHECK:       3:
4977; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
4978; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
4979; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
4980; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
4981; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
4982; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
4983; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
4984; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
4985; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
4986; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
4987;
4988  %tmp1 = load <4 x i32>, ptr %A
4989  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
4990  ret <4 x i16> %tmp3
4991}
4992
4993define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory {
4994; CHECK-LABEL: define <2 x i32> @uqrshrn2s(
4995; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
4996; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
4997; CHECK-NEXT:    call void @llvm.donothing()
4998; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
4999; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5000; CHECK:       2:
5001; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5002; CHECK-NEXT:    unreachable
5003; CHECK:       3:
5004; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
5005; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5006; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5007; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5008; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
5009; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
5010; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
5011; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
5012; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5013; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
5014;
5015  %tmp1 = load <2 x i64>, ptr %A
5016  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
5017  ret <2 x i32> %tmp3
5018}
5019
5020define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
5021; CHECK-LABEL: define <16 x i8> @uqrshrn16b(
5022; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5023; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5024; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5025; CHECK-NEXT:    call void @llvm.donothing()
5026; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5027; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5028; CHECK:       3:
5029; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5030; CHECK-NEXT:    unreachable
5031; CHECK:       4:
5032; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
5033; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5034; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5035; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5036; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
5037; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5038; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5039; CHECK:       8:
5040; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5041; CHECK-NEXT:    unreachable
5042; CHECK:       9:
5043; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
5044; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5045; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5046; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5047; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
5048; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
5049; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
5050; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
5051; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5052; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5053; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5054; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
5055;
5056  %out = load <8 x i8>, ptr %ret
5057  %tmp1 = load <8 x i16>, ptr %A
5058  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
5059  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5060  ret <16 x i8> %tmp4
5061}
5062
5063define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
5064; CHECK-LABEL: define <8 x i16> @uqrshrn8h(
5065; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5066; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5067; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5068; CHECK-NEXT:    call void @llvm.donothing()
5069; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5070; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5071; CHECK:       3:
5072; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5073; CHECK-NEXT:    unreachable
5074; CHECK:       4:
5075; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
5076; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5077; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5078; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5079; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
5080; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5081; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5082; CHECK:       8:
5083; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5084; CHECK-NEXT:    unreachable
5085; CHECK:       9:
5086; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
5087; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5088; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5089; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5090; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
5091; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
5092; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
5093; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
5094; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5095; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5096; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5097; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
5098;
5099  %out = load <4 x i16>, ptr %ret
5100  %tmp1 = load <4 x i32>, ptr %A
5101  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
5102  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5103  ret <8 x i16> %tmp4
5104}
5105
5106define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
5107; CHECK-LABEL: define <4 x i32> @uqrshrn4s(
5108; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5109; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5110; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5111; CHECK-NEXT:    call void @llvm.donothing()
5112; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5113; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5114; CHECK:       3:
5115; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5116; CHECK-NEXT:    unreachable
5117; CHECK:       4:
5118; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
5119; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5120; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5121; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5122; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
5123; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5124; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5125; CHECK:       8:
5126; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5127; CHECK-NEXT:    unreachable
5128; CHECK:       9:
5129; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
5130; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5131; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5132; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5133; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
5134; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
5135; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
5136; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
5137; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5138; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5139; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5140; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
5141;
5142  %out = load <2 x i32>, ptr %ret
5143  %tmp1 = load <2 x i64>, ptr %A
5144  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
5145  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5146  ret <4 x i32> %tmp4
5147}
5148
5149declare i32  @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
5150declare <8 x i8>  @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
5151declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
5152declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
5153
5154define i32 @uqshrn1s(i64 %A) nounwind sanitize_memory {
5155; CHECK-LABEL: define i32 @uqshrn1s(
5156; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
5157; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5158; CHECK-NEXT:    call void @llvm.donothing()
5159; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[TMP1]], i32 1)
5160; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[TMP2]], 0
5161; CHECK-NEXT:    [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[A]], i32 1)
5162; CHECK-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
5163; CHECK-NEXT:    ret i32 [[TMP]]
5164;
5165  %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
5166  ret i32 %tmp
5167}
5168
5169define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory {
5170; CHECK-LABEL: define <8 x i8> @uqshrn8b(
5171; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5172; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5173; CHECK-NEXT:    call void @llvm.donothing()
5174; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5175; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5176; CHECK:       2:
5177; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5178; CHECK-NEXT:    unreachable
5179; CHECK:       3:
5180; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
5181; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5182; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5183; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5184; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
5185; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[_MSLD]], i32 1)
5186; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
5187; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
5188; CHECK-NEXT:    store <8 x i8> [[TMP8]], ptr @__msan_retval_tls, align 8
5189; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
5190;
5191  %tmp1 = load <8 x i16>, ptr %A
5192  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
5193  ret <8 x i8> %tmp3
5194}
5195
5196define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory {
5197; CHECK-LABEL: define <4 x i16> @uqshrn4h(
5198; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5199; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5200; CHECK-NEXT:    call void @llvm.donothing()
5201; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5202; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5203; CHECK:       2:
5204; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5205; CHECK-NEXT:    unreachable
5206; CHECK:       3:
5207; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
5208; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5209; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5210; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5211; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
5212; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[_MSLD]], i32 1)
5213; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
5214; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
5215; CHECK-NEXT:    store <4 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
5216; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
5217;
5218  %tmp1 = load <4 x i32>, ptr %A
5219  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
5220  ret <4 x i16> %tmp3
5221}
5222
5223define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory {
5224; CHECK-LABEL: define <2 x i32> @uqshrn2s(
5225; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5226; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5227; CHECK-NEXT:    call void @llvm.donothing()
5228; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5229; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5230; CHECK:       2:
5231; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5232; CHECK-NEXT:    unreachable
5233; CHECK:       3:
5234; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
5235; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5236; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5237; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5238; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
5239; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[_MSLD]], i32 1)
5240; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
5241; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
5242; CHECK-NEXT:    store <2 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5243; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
5244;
5245  %tmp1 = load <2 x i64>, ptr %A
5246  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
5247  ret <2 x i32> %tmp3
5248}
5249
5250define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
5251; CHECK-LABEL: define <16 x i8> @uqshrn16b(
5252; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5253; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5254; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5255; CHECK-NEXT:    call void @llvm.donothing()
5256; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5257; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5258; CHECK:       3:
5259; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5260; CHECK-NEXT:    unreachable
5261; CHECK:       4:
5262; CHECK-NEXT:    [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
5263; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5264; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5265; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5266; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
5267; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5268; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5269; CHECK:       8:
5270; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5271; CHECK-NEXT:    unreachable
5272; CHECK:       9:
5273; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
5274; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5275; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5276; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5277; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
5278; CHECK-NEXT:    [[TMP13:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[_MSLD1]], i32 1)
5279; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
5280; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
5281; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5282; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5283; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5284; CHECK-NEXT:    ret <16 x i8> [[TMP4]]
5285;
5286  %out = load <8 x i8>, ptr %ret
5287  %tmp1 = load <8 x i16>, ptr %A
5288  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
5289  %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5290  ret <16 x i8> %tmp4
5291}
5292
5293define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
5294; CHECK-LABEL: define <8 x i16> @uqshrn8h(
5295; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5296; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5297; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5298; CHECK-NEXT:    call void @llvm.donothing()
5299; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5300; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5301; CHECK:       3:
5302; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5303; CHECK-NEXT:    unreachable
5304; CHECK:       4:
5305; CHECK-NEXT:    [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
5306; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5307; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5308; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5309; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
5310; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5311; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5312; CHECK:       8:
5313; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5314; CHECK-NEXT:    unreachable
5315; CHECK:       9:
5316; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
5317; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5318; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5319; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5320; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
5321; CHECK-NEXT:    [[TMP13:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[_MSLD1]], i32 1)
5322; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
5323; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
5324; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5325; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5326; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5327; CHECK-NEXT:    ret <8 x i16> [[TMP4]]
5328;
5329  %out = load <4 x i16>, ptr %ret
5330  %tmp1 = load <4 x i32>, ptr %A
5331  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
5332  %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
5333  ret <8 x i16> %tmp4
5334}
5335
5336define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
5337; CHECK-LABEL: define <4 x i32> @uqshrn4s(
5338; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
5339; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5340; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
5341; CHECK-NEXT:    call void @llvm.donothing()
5342; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5343; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
5344; CHECK:       3:
5345; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5346; CHECK-NEXT:    unreachable
5347; CHECK:       4:
5348; CHECK-NEXT:    [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
5349; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
5350; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
5351; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
5352; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
5353; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
5354; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
5355; CHECK:       8:
5356; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5357; CHECK-NEXT:    unreachable
5358; CHECK:       9:
5359; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
5360; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
5361; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
5362; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
5363; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
5364; CHECK-NEXT:    [[TMP13:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[_MSLD1]], i32 1)
5365; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
5366; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
5367; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5368; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5369; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5370; CHECK-NEXT:    ret <4 x i32> [[TMP4]]
5371;
5372  %out = load <2 x i32>, ptr %ret
5373  %tmp1 = load <2 x i64>, ptr %A
5374  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
5375  %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5376  ret <4 x i32> %tmp4
5377}
5378
5379declare i32  @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
5380declare <8 x i8>  @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
5381declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
5382declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
5383
5384define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory {
5385; CHECK-LABEL: define <8 x i16> @ushll8h(
5386; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5387; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5388; CHECK-NEXT:    call void @llvm.donothing()
5389; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5390; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5391; CHECK:       2:
5392; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5393; CHECK-NEXT:    unreachable
5394; CHECK:       3:
5395; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
5396; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5397; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5398; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5399; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
5400; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
5401; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
5402; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1)
5403; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
5404; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
5405; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
5406; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5407;
5408  %tmp1 = load <8 x i8>, ptr %A
5409  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
5410  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
5411  ret <8 x i16> %tmp3
5412}
5413
5414define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory {
5415; CHECK-LABEL: define <4 x i32> @ushll4s(
5416; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5417; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5418; CHECK-NEXT:    call void @llvm.donothing()
5419; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5420; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5421; CHECK:       2:
5422; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5423; CHECK-NEXT:    unreachable
5424; CHECK:       3:
5425; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
5426; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5427; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5428; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5429; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
5430; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
5431; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
5432; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], splat (i32 1)
5433; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
5434; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
5435; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5436; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5437;
5438  %tmp1 = load <4 x i16>, ptr %A
5439  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
5440  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
5441  ret <4 x i32> %tmp3
5442}
5443
5444define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory {
5445; CHECK-LABEL: define <2 x i64> @ushll2d(
5446; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5447; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5448; CHECK-NEXT:    call void @llvm.donothing()
5449; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5450; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5451; CHECK:       2:
5452; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5453; CHECK-NEXT:    unreachable
5454; CHECK:       3:
5455; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
5456; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5457; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5458; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5459; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
5460; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
5461; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
5462; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1)
5463; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
5464; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
5465; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
5466; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
5467;
5468  %tmp1 = load <2 x i32>, ptr %A
5469  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
5470  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
5471  ret <2 x i64> %tmp3
5472}
5473
5474define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory {
5475; CHECK-LABEL: define <8 x i16> @ushll2_8h(
5476; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5477; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5478; CHECK-NEXT:    call void @llvm.donothing()
5479; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5480; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5481; CHECK:       2:
5482; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5483; CHECK-NEXT:    unreachable
5484; CHECK:       3:
5485; CHECK-NEXT:    [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
5486; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5487; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5488; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5489; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
5490; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5491; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5492; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16>
5493; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
5494; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1)
5495; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
5496; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
5497; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
5498; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5499;
5500  %load1 = load <16 x i8>, ptr %A
5501  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5502  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
5503  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
5504  ret <8 x i16> %tmp3
5505}
5506
5507define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory {
5508; CHECK-LABEL: define <4 x i32> @ushll2_4s(
5509; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5510; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5511; CHECK-NEXT:    call void @llvm.donothing()
5512; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5513; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5514; CHECK:       2:
5515; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5516; CHECK-NEXT:    unreachable
5517; CHECK:       3:
5518; CHECK-NEXT:    [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
5519; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5520; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5521; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5522; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
5523; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
5524; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
5525; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
5526; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
5527; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1)
5528; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
5529; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
5530; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
5531; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5532;
5533  %load1 = load <8 x i16>, ptr %A
5534  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
5535  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
5536  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
5537  ret <4 x i32> %tmp3
5538}
5539
5540define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory {
5541; CHECK-LABEL: define <2 x i64> @ushll2_2d(
5542; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5543; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5544; CHECK-NEXT:    call void @llvm.donothing()
5545; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5546; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5547; CHECK:       2:
5548; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5549; CHECK-NEXT:    unreachable
5550; CHECK:       3:
5551; CHECK-NEXT:    [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
5552; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5553; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5554; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5555; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
5556; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3>
5557; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
5558; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64>
5559; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
5560; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1)
5561; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
5562; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
5563; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
5564; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
5565;
5566  %load1 = load <4 x i32>, ptr %A
5567  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
5568  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
5569  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
5570  ret <2 x i64> %tmp3
5571}
5572
5573declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
5574declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
5575declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
5576declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
5577declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
5578declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
5579
5580define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory {
5581; CHECK-LABEL: define <8 x i16> @neon.ushll8h_constant_shift(
5582; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5583; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5584; CHECK-NEXT:    call void @llvm.donothing()
5585; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5586; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5587; CHECK:       2:
5588; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5589; CHECK-NEXT:    unreachable
5590; CHECK:       3:
5591; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
5592; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5593; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5594; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5595; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
5596; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
5597; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
5598; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1))
5599; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
5600; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1))
5601; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5602; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5603;
5604  %tmp1 = load <8 x i8>, ptr %A
5605  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
5606  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
5607  ret <8 x i16> %tmp3
5608}
5609
5610define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory {
5611; CHECK-LABEL: define <8 x i16> @neon.ushl8h_no_constant_shift(
5612; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5613; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5614; CHECK-NEXT:    call void @llvm.donothing()
5615; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5616; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5617; CHECK:       2:
5618; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5619; CHECK-NEXT:    unreachable
5620; CHECK:       3:
5621; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
5622; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5623; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5624; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5625; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
5626; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
5627; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
5628; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i16> [[_MSPROP]] to i128
5629; CHECK-NEXT:    [[TMP8:%.*]] = trunc i128 [[TMP7]] to i64
5630; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
5631; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i128
5632; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i128 [[TMP10]] to <8 x i16>
5633; CHECK-NEXT:    [[TMP12:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> [[TMP2]])
5634; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP12]], [[TMP11]]
5635; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> [[TMP2]])
5636; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5637; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5638;
5639  %tmp1 = load <8 x i8>, ptr %A
5640  %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
5641  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
5642  ret <8 x i16> %tmp3
5643}
5644
5645define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sanitize_memory {
5646; CHECK-LABEL: define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(
5647; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5648; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5649; CHECK-NEXT:    call void @llvm.donothing()
5650; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5651; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5652; CHECK:       2:
5653; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5654; CHECK-NEXT:    unreachable
5655; CHECK:       3:
5656; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
5657; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5658; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5659; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5660; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
5661; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32>
5662; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
5663; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
5664; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
5665; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
5666; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5667; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5668;
5669  %tmp1 = load <4 x i8>, ptr %A
5670  %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
5671  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
5672  ret <4 x i32> %tmp3
5673}
5674
5675define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memory {
5676; CHECK-LABEL: define <8 x i16> @neon.ushl8_noext_constant_shift(
5677; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5678; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5679; CHECK-NEXT:    call void @llvm.donothing()
5680; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5681; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5682; CHECK:       2:
5683; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5684; CHECK-NEXT:    unreachable
5685; CHECK:       3:
5686; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
5687; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5688; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5689; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5690; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
5691; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
5692; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
5693; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
5694; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5695; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5696;
5697  %tmp1 = load <8 x i16>, ptr %A
5698  %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
5699  ret <8 x i16> %tmp3
5700}
5701
5702define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory {
5703; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_shift(
5704; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5705; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5706; CHECK-NEXT:    call void @llvm.donothing()
5707; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5708; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5709; CHECK:       2:
5710; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5711; CHECK-NEXT:    unreachable
5712; CHECK:       3:
5713; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
5714; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5715; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5716; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5717; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
5718; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
5719; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
5720; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
5721; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
5722; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
5723; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5724; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5725;
5726  %tmp1 = load <4 x i16>, ptr %A
5727  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
5728  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
5729  ret <4 x i32> %tmp3
5730}
5731
5732; FIXME: unnecessary ushll.4s v0, v0, #0?
5733define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
5734; CHECK-LABEL: define <4 x i32> @neon.ushll4s_neg_constant_shift(
5735; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5736; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5737; CHECK-NEXT:    call void @llvm.donothing()
5738; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5739; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5740; CHECK:       2:
5741; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5742; CHECK-NEXT:    unreachable
5743; CHECK:       3:
5744; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
5745; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5746; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5747; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5748; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
5749; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
5750; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
5751; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1))
5752; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
5753; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1))
5754; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5755; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5756;
5757  %tmp1 = load <4 x i16>, ptr %A
5758  %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
5759  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
5760  ret <4 x i32> %tmp3
5761}
5762
5763; FIXME: should be constant folded.
5764define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory {
5765; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold(
5766; CHECK-SAME: ) #[[ATTR0]] {
5767; CHECK-NEXT:    call void @llvm.donothing()
5768; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 1))
5769; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
5770; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> splat (i32 1))
5771; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
5772; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
5773;
5774  %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
5775  ret <4 x i32> %tmp3
5776}
5777
5778define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory {
5779; CHECK-LABEL: define <2 x i64> @neon.ushll2d_constant_shift(
5780; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5781; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5782; CHECK-NEXT:    call void @llvm.donothing()
5783; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5784; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5785; CHECK:       2:
5786; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5787; CHECK-NEXT:    unreachable
5788; CHECK:       3:
5789; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
5790; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5791; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5792; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5793; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
5794; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
5795; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
5796; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1))
5797; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
5798; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1))
5799; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5800; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
5801;
5802  %tmp1 = load <2 x i32>, ptr %A
5803  %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
5804  %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
5805  ret <2 x i64> %tmp3
5806}
5807
5808define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
5809; CHECK-LABEL: define <1 x i64> @neon.ushl_vscalar_constant_shift(
5810; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5811; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5812; CHECK-NEXT:    call void @llvm.donothing()
5813; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5814; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5815; CHECK:       2:
5816; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5817; CHECK-NEXT:    unreachable
5818; CHECK:       3:
5819; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
5820; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5821; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5822; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5823; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
5824; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
5825; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
5826; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1))
5827; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
5828; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1))
5829; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5830; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
5831;
5832  %tmp1 = load <1 x i32>, ptr %A
5833  %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
5834  %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
5835  ret <1 x i64> %tmp3
5836}
5837
5838define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
5839; CHECK-LABEL: define i64 @neon.ushl_scalar_constant_shift(
5840; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5841; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5842; CHECK-NEXT:    call void @llvm.donothing()
5843; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5844; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5845; CHECK:       2:
5846; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5847; CHECK-NEXT:    unreachable
5848; CHECK:       3:
5849; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
5850; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5851; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5852; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5853; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
5854; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
5855; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
5856; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[_MSPROP]], i64 1)
5857; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
5858; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[TMP2]], i64 1)
5859; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
5860; CHECK-NEXT:    ret i64 [[TMP3]]
5861;
5862  %tmp1 = load i32, ptr %A
5863  %tmp2 = zext i32 %tmp1 to i64
5864  %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
5865  ret i64 %tmp3
5866}
5867
5868define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory {
5869; CHECK-LABEL: define <8 x i16> @sshll8h(
5870; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5871; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5872; CHECK-NEXT:    call void @llvm.donothing()
5873; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5874; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5875; CHECK:       2:
5876; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5877; CHECK-NEXT:    unreachable
5878; CHECK:       3:
5879; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
5880; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5881; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5882; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5883; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
5884; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
5885; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
5886; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1)
5887; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
5888; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
5889; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
5890; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
5891;
5892  %tmp1 = load <8 x i8>, ptr %A
5893  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
5894  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
5895  ret <8 x i16> %tmp3
5896}
5897
5898define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory {
5899; CHECK-LABEL: define <2 x i64> @sshll2d(
5900; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5901; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5902; CHECK-NEXT:    call void @llvm.donothing()
5903; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5904; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5905; CHECK:       2:
5906; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5907; CHECK-NEXT:    unreachable
5908; CHECK:       3:
5909; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
5910; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5911; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5912; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5913; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
5914; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
5915; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
5916; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1)
5917; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
5918; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
5919; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
5920; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
5921;
5922  %tmp1 = load <2 x i32>, ptr %A
5923  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
5924  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
5925  ret <2 x i64> %tmp3
5926}
5927
5928declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
5929declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
5930declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
5931declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
5932declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
5933declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
5934
5935define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory {
5936; CHECK-LABEL: define <16 x i8> @neon.sshl16b_constant_shift(
5937; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5938; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5939; CHECK-NEXT:    call void @llvm.donothing()
5940; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5941; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5942; CHECK:       2:
5943; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5944; CHECK-NEXT:    unreachable
5945; CHECK:       3:
5946; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
5947; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5948; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5949; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5950; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
5951; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
5952; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
5953; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
5954; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5955; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
5956;
5957  %tmp1 = load <16 x i8>, ptr %A
5958  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
5959  ret <16 x i8> %tmp2
5960}
5961
5962define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitize_memory {
5963; CHECK-LABEL: define <16 x i8> @neon.sshl16b_non_splat_constant_shift(
5964; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5965; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5966; CHECK-NEXT:    call void @llvm.donothing()
5967; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5968; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5969; CHECK:       2:
5970; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5971; CHECK-NEXT:    unreachable
5972; CHECK:       3:
5973; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
5974; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
5975; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
5976; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
5977; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
5978; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
5979; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
5980; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
5981; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
5982; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
5983;
5984  %tmp1 = load <16 x i8>, ptr %A
5985  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
5986  ret <16 x i8> %tmp2
5987}
5988
5989define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memory {
5990; CHECK-LABEL: define <16 x i8> @neon.sshl16b_neg_constant_shift(
5991; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
5992; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
5993; CHECK-NEXT:    call void @llvm.donothing()
5994; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
5995; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
5996; CHECK:       2:
5997; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
5998; CHECK-NEXT:    unreachable
5999; CHECK:       3:
6000; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
6001; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6002; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6003; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6004; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
6005; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -2))
6006; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
6007; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -2))
6008; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6009; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
6010;
6011  %tmp1 = load <16 x i8>, ptr %A
6012  %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
6013  ret <16 x i8> %tmp2
6014}
6015
6016define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory {
6017; CHECK-LABEL: define <8 x i16> @neon.sshll8h_constant_shift(
6018; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6019; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6020; CHECK-NEXT:    call void @llvm.donothing()
6021; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6022; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6023; CHECK:       2:
6024; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6025; CHECK-NEXT:    unreachable
6026; CHECK:       3:
6027; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
6028; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6029; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6030; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6031; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
6032; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
6033; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
6034; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1))
6035; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
6036; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1))
6037; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6038; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
6039;
6040  %tmp1 = load <8 x i8>, ptr %A
6041  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
6042  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
6043  ret <8 x i16> %tmp3
6044}
6045
6046define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize_memory {
6047; CHECK-LABEL: define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(
6048; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6049; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6050; CHECK-NEXT:    call void @llvm.donothing()
6051; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6052; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6053; CHECK:       2:
6054; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6055; CHECK-NEXT:    unreachable
6056; CHECK:       3:
6057; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
6058; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6059; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6060; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6061; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
6062; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32>
6063; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
6064; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
6065; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6066; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
6067; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6068; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6069;
6070  %tmp1 = load <4 x i8>, ptr %A
6071  %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
6072  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
6073  ret <4 x i32> %tmp3
6074}
6075
6076define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory {
6077; CHECK-LABEL: define <4 x i32> @neon.sshll4s_constant_shift(
6078; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6079; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6080; CHECK-NEXT:    call void @llvm.donothing()
6081; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6082; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6083; CHECK:       2:
6084; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6085; CHECK-NEXT:    unreachable
6086; CHECK:       3:
6087; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
6088; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6089; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6090; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6091; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
6092; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
6093; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6094; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
6095; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6096; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
6097; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6098; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6099;
6100  %tmp1 = load <4 x i16>, ptr %A
6101  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
6102  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
6103  ret <4 x i32> %tmp3
6104}
6105
6106define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
6107; CHECK-LABEL: define <4 x i32> @neon.sshll4s_neg_constant_shift(
6108; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6109; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6110; CHECK-NEXT:    call void @llvm.donothing()
6111; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6112; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6113; CHECK:       2:
6114; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6115; CHECK-NEXT:    unreachable
6116; CHECK:       3:
6117; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
6118; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6119; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6120; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6121; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
6122; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
6123; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6124; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1))
6125; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6126; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1))
6127; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6128; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6129;
6130  %tmp1 = load <4 x i16>, ptr %A
6131  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
6132  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
6133  ret <4 x i32> %tmp3
6134}
6135
6136; FIXME: should be constant folded.
6137define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory {
6138; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold(
6139; CHECK-SAME: ) #[[ATTR0]] {
6140; CHECK-NEXT:    call void @llvm.donothing()
6141; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 2))
6142; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
6143; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> splat (i32 2))
6144; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
6145; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6146;
6147  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
6148  ret <4 x i32> %tmp3
6149}
6150
6151define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory {
6152; CHECK-LABEL: define <4 x i32> @neon.sshl4s_no_fold(
6153; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6154; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6155; CHECK-NEXT:    call void @llvm.donothing()
6156; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6157; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6158; CHECK:       2:
6159; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6160; CHECK-NEXT:    unreachable
6161; CHECK:       3:
6162; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
6163; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6164; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6165; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6166; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
6167; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
6168; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6169; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
6170; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6171; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6172;
6173  %tmp1 = load <4 x i32>, ptr %A
6174  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
6175  ret <4 x i32> %tmp3
6176}
6177
6178define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory {
6179; CHECK-LABEL: define <2 x i64> @neon.sshll2d_constant_shift(
6180; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6181; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6182; CHECK-NEXT:    call void @llvm.donothing()
6183; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6184; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6185; CHECK:       2:
6186; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6187; CHECK-NEXT:    unreachable
6188; CHECK:       3:
6189; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
6190; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6191; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6192; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6193; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
6194; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
6195; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6196; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1))
6197; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
6198; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1))
6199; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6200; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6201;
6202  %tmp1 = load <2 x i32>, ptr %A
6203  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
6204  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
6205  ret <2 x i64> %tmp3
6206}
6207
6208define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
6209; CHECK-LABEL: define <1 x i64> @neon.sshll_vscalar_constant_shift(
6210; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6211; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6212; CHECK-NEXT:    call void @llvm.donothing()
6213; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6214; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6215; CHECK:       2:
6216; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6217; CHECK-NEXT:    unreachable
6218; CHECK:       3:
6219; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
6220; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6221; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6222; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6223; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
6224; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
6225; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
6226; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1))
6227; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
6228; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1))
6229; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6230; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
6231;
6232  %tmp1 = load <1 x i32>, ptr %A
6233  %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
6234  %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
6235  ret <1 x i64> %tmp3
6236}
6237
6238define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
6239; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift(
6240; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6241; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6242; CHECK-NEXT:    call void @llvm.donothing()
6243; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6244; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6245; CHECK:       2:
6246; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6247; CHECK-NEXT:    unreachable
6248; CHECK:       3:
6249; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
6250; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6251; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6252; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6253; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
6254; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
6255; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
6256; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[_MSPROP]], i64 1)
6257; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
6258; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 1)
6259; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6260; CHECK-NEXT:    ret i64 [[TMP3]]
6261;
6262  %tmp1 = load i32, ptr %A
6263  %tmp2 = zext i32 %tmp1 to i64
6264  %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
6265  ret i64 %tmp3
6266}
6267
6268define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory {
6269; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift_m1(
6270; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6271; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6272; CHECK-NEXT:    call void @llvm.donothing()
6273; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6274; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6275; CHECK:       2:
6276; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6277; CHECK-NEXT:    unreachable
6278; CHECK:       3:
6279; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A]], align 4
6280; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6281; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6282; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6283; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
6284; CHECK-NEXT:    [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
6285; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
6286; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[_MSPROP]], i64 -1)
6287; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[TMP7]], 0
6288; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 -1)
6289; CHECK-NEXT:    store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
6290; CHECK-NEXT:    ret i64 [[TMP3]]
6291;
6292  %tmp1 = load i32, ptr %A
6293  %tmp2 = zext i32 %tmp1 to i64
6294  %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
6295  ret i64 %tmp3
6296}
6297
6298; FIXME: should be constant folded.
6299define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory {
6300; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold(
6301; CHECK-SAME: ) #[[ATTR0]] {
6302; CHECK-NEXT:    call void @llvm.donothing()
6303; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> splat (i64 1))
6304; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
6305; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> splat (i64 1))
6306; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
6307; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6308;
6309  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
6310  ret <2 x i64> %tmp3
6311}
6312
6313define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory {
6314; CHECK-LABEL: define <2 x i64> @neon.sshl2d_no_fold(
6315; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6316; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6317; CHECK-NEXT:    call void @llvm.donothing()
6318; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6319; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6320; CHECK:       2:
6321; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6322; CHECK-NEXT:    unreachable
6323; CHECK:       3:
6324; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[A]], align 16
6325; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6326; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6327; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6328; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
6329; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 2))
6330; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
6331; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 2))
6332; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6333; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6334;
6335  %tmp2 = load <2 x i64>, ptr %A
6336  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
6337  ret <2 x i64> %tmp3
6338}
6339
6340define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory {
6341; CHECK-LABEL: define <8 x i16> @sshll2_8h(
6342; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6343; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6344; CHECK-NEXT:    call void @llvm.donothing()
6345; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6346; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6347; CHECK:       2:
6348; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6349; CHECK-NEXT:    unreachable
6350; CHECK:       3:
6351; CHECK-NEXT:    [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
6352; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6353; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6354; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6355; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
6356; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6357; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6358; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16>
6359; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
6360; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1)
6361; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
6362; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
6363; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
6364; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
6365;
6366  %load1 = load <16 x i8>, ptr %A
6367  %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
6368  %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
6369  %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
6370  ret <8 x i16> %tmp3
6371}
6372
6373define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory {
6374; CHECK-LABEL: define <4 x i32> @sshll2_4s(
6375; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6376; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6377; CHECK-NEXT:    call void @llvm.donothing()
6378; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6379; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6380; CHECK:       2:
6381; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6382; CHECK-NEXT:    unreachable
6383; CHECK:       3:
6384; CHECK-NEXT:    [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
6385; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6386; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6387; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6388; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
6389; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6390; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6391; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32>
6392; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
6393; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1)
6394; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6395; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
6396; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
6397; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6398;
6399  %load1 = load <8 x i16>, ptr %A
6400  %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
6401  %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
6402  %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
6403  ret <4 x i32> %tmp3
6404}
6405
6406define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory {
6407; CHECK-LABEL: define <2 x i64> @sshll2_2d(
6408; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6409; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6410; CHECK-NEXT:    call void @llvm.donothing()
6411; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6412; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6413; CHECK:       2:
6414; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6415; CHECK-NEXT:    unreachable
6416; CHECK:       3:
6417; CHECK-NEXT:    [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
6418; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6419; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6420; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6421; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
6422; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3>
6423; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
6424; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64>
6425; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
6426; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1)
6427; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
6428; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
6429; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
6430; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6431;
6432  %load1 = load <4 x i32>, ptr %A
6433  %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
6434  %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
6435  %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
6436  ret <2 x i64> %tmp3
6437}
6438
6439define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory {
6440; CHECK-LABEL: define <8 x i8> @sqshli8b(
6441; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6442; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6443; CHECK-NEXT:    call void @llvm.donothing()
6444; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6445; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6446; CHECK:       2:
6447; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6448; CHECK-NEXT:    unreachable
6449; CHECK:       3:
6450; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
6451; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6452; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6453; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6454; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
6455; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
6456; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
6457; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
6458; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6459; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
6460;
6461  %tmp1 = load <8 x i8>, ptr %A
6462  %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
6463  ret <8 x i8> %tmp3
6464}
6465
6466define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory {
6467; CHECK-LABEL: define <4 x i16> @sqshli4h(
6468; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6469; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6470; CHECK-NEXT:    call void @llvm.donothing()
6471; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6472; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6473; CHECK:       2:
6474; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6475; CHECK-NEXT:    unreachable
6476; CHECK:       3:
6477; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
6478; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6479; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6480; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6481; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
6482; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
6483; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
6484; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
6485; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6486; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
6487;
6488  %tmp1 = load <4 x i16>, ptr %A
6489  %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
6490  ret <4 x i16> %tmp3
6491}
6492
6493define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory {
6494; CHECK-LABEL: define <2 x i32> @sqshli2s(
6495; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6496; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6497; CHECK-NEXT:    call void @llvm.donothing()
6498; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6499; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6500; CHECK:       2:
6501; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6502; CHECK-NEXT:    unreachable
6503; CHECK:       3:
6504; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
6505; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6506; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6507; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6508; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
6509; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
6510; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
6511; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
6512; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6513; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
6514;
6515  %tmp1 = load <2 x i32>, ptr %A
6516  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
6517  ret <2 x i32> %tmp3
6518}
6519
6520define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory {
6521; CHECK-LABEL: define <16 x i8> @sqshli16b(
6522; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6523; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6524; CHECK-NEXT:    call void @llvm.donothing()
6525; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6526; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6527; CHECK:       2:
6528; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6529; CHECK-NEXT:    unreachable
6530; CHECK:       3:
6531; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
6532; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6533; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6534; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6535; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
6536; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
6537; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
6538; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
6539; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6540; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
6541;
6542  %tmp1 = load <16 x i8>, ptr %A
6543  %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
6544  ret <16 x i8> %tmp3
6545}
6546
6547define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory {
6548; CHECK-LABEL: define <8 x i16> @sqshli8h(
6549; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6550; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6551; CHECK-NEXT:    call void @llvm.donothing()
6552; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6553; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6554; CHECK:       2:
6555; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6556; CHECK-NEXT:    unreachable
6557; CHECK:       3:
6558; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
6559; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6560; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6561; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6562; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
6563; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
6564; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
6565; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
6566; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6567; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
6568;
6569  %tmp1 = load <8 x i16>, ptr %A
6570  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
6571  ret <8 x i16> %tmp3
6572}
6573
6574define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory {
6575; CHECK-LABEL: define <4 x i32> @sqshli4s(
6576; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6577; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6578; CHECK-NEXT:    call void @llvm.donothing()
6579; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6580; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6581; CHECK:       2:
6582; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6583; CHECK-NEXT:    unreachable
6584; CHECK:       3:
6585; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
6586; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6587; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6588; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6589; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
6590; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
6591; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6592; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
6593; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6594; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6595;
6596  %tmp1 = load <4 x i32>, ptr %A
6597  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
6598  ret <4 x i32> %tmp3
6599}
6600
6601define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory {
6602; CHECK-LABEL: define <2 x i64> @sqshli2d(
6603; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6604; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6605; CHECK-NEXT:    call void @llvm.donothing()
6606; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6607; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6608; CHECK:       2:
6609; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6610; CHECK-NEXT:    unreachable
6611; CHECK:       3:
6612; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
6613; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6614; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6615; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6616; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
6617; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
6618; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
6619; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
6620; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6621; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6622;
6623  %tmp1 = load <2 x i64>, ptr %A
6624  %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
6625  ret <2 x i64> %tmp3
6626}
6627
6628define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory {
6629; CHECK-LABEL: define <8 x i8> @uqshli8b(
6630; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6631; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6632; CHECK-NEXT:    call void @llvm.donothing()
6633; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6634; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6635; CHECK:       2:
6636; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6637; CHECK-NEXT:    unreachable
6638; CHECK:       3:
6639; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
6640; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6641; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6642; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6643; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
6644; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
6645; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
6646; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
6647; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6648; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
6649;
6650  %tmp1 = load <8 x i8>, ptr %A
6651  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
6652  ret <8 x i8> %tmp3
6653}
6654
6655define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory {
6656; CHECK-LABEL: define <8 x i8> @uqshli8b_1(
6657; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6658; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6659; CHECK-NEXT:    call void @llvm.donothing()
6660; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6661; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6662; CHECK:       2:
6663; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6664; CHECK-NEXT:    unreachable
6665; CHECK:       3:
6666; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
6667; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6668; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6669; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6670; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
6671; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 8))
6672; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
6673; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 8))
6674; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6675; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
6676;
6677  %tmp1 = load <8 x i8>, ptr %A
6678  %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
6679  ret <8 x i8> %tmp3
6680}
6681
6682define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory {
6683; CHECK-LABEL: define <4 x i16> @uqshli4h(
6684; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6685; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6686; CHECK-NEXT:    call void @llvm.donothing()
6687; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6688; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6689; CHECK:       2:
6690; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6691; CHECK-NEXT:    unreachable
6692; CHECK:       3:
6693; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
6694; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6695; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6696; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6697; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
6698; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
6699; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
6700; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
6701; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6702; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
6703;
6704  %tmp1 = load <4 x i16>, ptr %A
6705  %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
6706  ret <4 x i16> %tmp3
6707}
6708
6709define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory {
6710; CHECK-LABEL: define <2 x i32> @uqshli2s(
6711; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6712; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6713; CHECK-NEXT:    call void @llvm.donothing()
6714; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6715; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6716; CHECK:       2:
6717; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6718; CHECK-NEXT:    unreachable
6719; CHECK:       3:
6720; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
6721; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6722; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6723; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6724; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
6725; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
6726; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
6727; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
6728; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6729; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
6730;
6731  %tmp1 = load <2 x i32>, ptr %A
6732  %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
6733  ret <2 x i32> %tmp3
6734}
6735
6736define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory {
6737; CHECK-LABEL: define <16 x i8> @uqshli16b(
6738; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6739; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6740; CHECK-NEXT:    call void @llvm.donothing()
6741; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6742; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6743; CHECK:       2:
6744; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6745; CHECK-NEXT:    unreachable
6746; CHECK:       3:
6747; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
6748; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6749; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6750; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6751; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
6752; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
6753; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
6754; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
6755; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6756; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
6757;
6758  %tmp1 = load <16 x i8>, ptr %A
6759  %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
6760  ret <16 x i8> %tmp3
6761}
6762
6763define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory {
6764; CHECK-LABEL: define <8 x i16> @uqshli8h(
6765; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6766; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6767; CHECK-NEXT:    call void @llvm.donothing()
6768; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6769; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6770; CHECK:       2:
6771; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6772; CHECK-NEXT:    unreachable
6773; CHECK:       3:
6774; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
6775; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6776; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6777; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6778; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
6779; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
6780; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
6781; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
6782; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6783; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
6784;
6785  %tmp1 = load <8 x i16>, ptr %A
6786  %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
6787  ret <8 x i16> %tmp3
6788}
6789
6790define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory {
6791; CHECK-LABEL: define <4 x i32> @uqshli4s(
6792; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6793; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6794; CHECK-NEXT:    call void @llvm.donothing()
6795; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6796; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6797; CHECK:       2:
6798; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6799; CHECK-NEXT:    unreachable
6800; CHECK:       3:
6801; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
6802; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6803; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6804; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6805; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
6806; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
6807; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
6808; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
6809; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6810; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
6811;
6812  %tmp1 = load <4 x i32>, ptr %A
6813  %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
6814  ret <4 x i32> %tmp3
6815}
6816
6817define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory {
6818; CHECK-LABEL: define <2 x i64> @uqshli2d(
6819; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
6820; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6821; CHECK-NEXT:    call void @llvm.donothing()
6822; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6823; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
6824; CHECK:       2:
6825; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6826; CHECK-NEXT:    unreachable
6827; CHECK:       3:
6828; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
6829; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
6830; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
6831; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
6832; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
6833; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
6834; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
6835; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
6836; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
6837; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
6838;
6839  %tmp1 = load <2 x i64>, ptr %A
6840  %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
6841  ret <2 x i64> %tmp3
6842}
6843
6844define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory {
6845; CHECK-LABEL: define <8 x i8> @ursra8b(
6846; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
6847; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6848; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6849; CHECK-NEXT:    call void @llvm.donothing()
6850; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6851; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
6852; CHECK:       3:
6853; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6854; CHECK-NEXT:    unreachable
6855; CHECK:       4:
6856; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
6857; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
6858; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
6859; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
6860; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
6861; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
6862; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
6863; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
6864; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
6865; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6866; CHECK:       10:
6867; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6868; CHECK-NEXT:    unreachable
6869; CHECK:       11:
6870; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
6871; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
6872; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
6873; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
6874; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
6875; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
6876; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
6877; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
6878; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
6879;
6880  %tmp1 = load <8 x i8>, ptr %A
6881  %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
6882  %tmp4 = load <8 x i8>, ptr %B
6883  %tmp5 = add <8 x i8> %tmp3, %tmp4
6884  ret <8 x i8> %tmp5
6885}
6886
6887define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory {
6888; CHECK-LABEL: define <4 x i16> @ursra4h(
6889; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
6890; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6891; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6892; CHECK-NEXT:    call void @llvm.donothing()
6893; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6894; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
6895; CHECK:       3:
6896; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6897; CHECK-NEXT:    unreachable
6898; CHECK:       4:
6899; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
6900; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
6901; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
6902; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
6903; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
6904; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
6905; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
6906; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
6907; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
6908; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6909; CHECK:       10:
6910; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6911; CHECK-NEXT:    unreachable
6912; CHECK:       11:
6913; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
6914; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
6915; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
6916; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
6917; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
6918; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
6919; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
6920; CHECK-NEXT:    store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
6921; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
6922;
6923  %tmp1 = load <4 x i16>, ptr %A
6924  %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
6925  %tmp4 = load <4 x i16>, ptr %B
6926  %tmp5 = add <4 x i16> %tmp3, %tmp4
6927  ret <4 x i16> %tmp5
6928}
6929
6930define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory {
6931; CHECK-LABEL: define <2 x i32> @ursra2s(
6932; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
6933; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6934; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6935; CHECK-NEXT:    call void @llvm.donothing()
6936; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6937; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
6938; CHECK:       3:
6939; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6940; CHECK-NEXT:    unreachable
6941; CHECK:       4:
6942; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
6943; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
6944; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
6945; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
6946; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
6947; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
6948; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
6949; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
6950; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
6951; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6952; CHECK:       10:
6953; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6954; CHECK-NEXT:    unreachable
6955; CHECK:       11:
6956; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
6957; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
6958; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
6959; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
6960; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
6961; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
6962; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
6963; CHECK-NEXT:    store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
6964; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
6965;
6966  %tmp1 = load <2 x i32>, ptr %A
6967  %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
6968  %tmp4 = load <2 x i32>, ptr %B
6969  %tmp5 = add <2 x i32> %tmp3, %tmp4
6970  ret <2 x i32> %tmp5
6971}
6972
6973define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory {
6974; CHECK-LABEL: define <16 x i8> @ursra16b(
6975; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
6976; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
6977; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
6978; CHECK-NEXT:    call void @llvm.donothing()
6979; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
6980; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
6981; CHECK:       3:
6982; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6983; CHECK-NEXT:    unreachable
6984; CHECK:       4:
6985; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
6986; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
6987; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
6988; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
6989; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
6990; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
6991; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
6992; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
6993; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
6994; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
6995; CHECK:       10:
6996; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
6997; CHECK-NEXT:    unreachable
6998; CHECK:       11:
6999; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
7000; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7001; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7002; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7003; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
7004; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
7005; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
7006; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7007; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
7008;
7009  %tmp1 = load <16 x i8>, ptr %A
7010  %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
7011  %tmp4 = load <16 x i8>, ptr %B
7012  %tmp5 = add <16 x i8> %tmp3, %tmp4
7013  ret <16 x i8> %tmp5
7014}
7015
7016define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory {
7017; CHECK-LABEL: define <8 x i16> @ursra8h(
7018; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7019; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7020; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7021; CHECK-NEXT:    call void @llvm.donothing()
7022; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7023; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7024; CHECK:       3:
7025; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7026; CHECK-NEXT:    unreachable
7027; CHECK:       4:
7028; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
7029; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7030; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7031; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7032; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
7033; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
7034; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
7035; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
7036; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7037; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7038; CHECK:       10:
7039; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7040; CHECK-NEXT:    unreachable
7041; CHECK:       11:
7042; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
7043; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7044; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7045; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7046; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
7047; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
7048; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
7049; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7050; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
7051;
7052  %tmp1 = load <8 x i16>, ptr %A
7053  %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
7054  %tmp4 = load <8 x i16>, ptr %B
7055  %tmp5 = add <8 x i16> %tmp3, %tmp4
7056  ret <8 x i16> %tmp5
7057}
7058
7059define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory {
7060; CHECK-LABEL: define <4 x i32> @ursra4s(
7061; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7062; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7063; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7064; CHECK-NEXT:    call void @llvm.donothing()
7065; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7066; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7067; CHECK:       3:
7068; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7069; CHECK-NEXT:    unreachable
7070; CHECK:       4:
7071; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
7072; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7073; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7074; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7075; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
7076; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
7077; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
7078; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
7079; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7080; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7081; CHECK:       10:
7082; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7083; CHECK-NEXT:    unreachable
7084; CHECK:       11:
7085; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
7086; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7087; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7088; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7089; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
7090; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
7091; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
7092; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7093; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
7094;
7095  %tmp1 = load <4 x i32>, ptr %A
7096  %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
7097  %tmp4 = load <4 x i32>, ptr %B
7098  %tmp5 = add <4 x i32> %tmp3, %tmp4
7099  ret <4 x i32> %tmp5
7100}
7101
7102define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory {
7103; CHECK-LABEL: define <2 x i64> @ursra2d(
7104; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7105; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7106; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7107; CHECK-NEXT:    call void @llvm.donothing()
7108; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7109; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7110; CHECK:       3:
7111; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7112; CHECK-NEXT:    unreachable
7113; CHECK:       4:
7114; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
7115; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7116; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7117; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7118; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
7119; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
7120; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
7121; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
7122; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7123; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7124; CHECK:       10:
7125; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7126; CHECK-NEXT:    unreachable
7127; CHECK:       11:
7128; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
7129; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7130; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7131; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7132; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
7133; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
7134; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
7135; CHECK-NEXT:    store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7136; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
7137;
7138  %tmp1 = load <2 x i64>, ptr %A
7139  %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
7140  %tmp4 = load <2 x i64>, ptr %B
7141  %tmp5 = add <2 x i64> %tmp3, %tmp4
7142  ret <2 x i64> %tmp5
7143}
7144
7145define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory {
7146; CHECK-LABEL: define <1 x i64> @ursra1d(
7147; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7148; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7149; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7150; CHECK-NEXT:    call void @llvm.donothing()
7151; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7152; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7153; CHECK:       3:
7154; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7155; CHECK-NEXT:    unreachable
7156; CHECK:       4:
7157; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
7158; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7159; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7160; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7161; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
7162; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
7163; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
7164; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
7165; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7166; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7167; CHECK:       10:
7168; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7169; CHECK-NEXT:    unreachable
7170; CHECK:       11:
7171; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
7172; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7173; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7174; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7175; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
7176; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
7177; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
7178; CHECK-NEXT:    store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7179; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
7180;
7181  %tmp1 = load <1 x i64>, ptr %A
7182  %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
7183  %tmp4 = load <1 x i64>, ptr %B
7184  %tmp5 = add <1 x i64> %tmp3, %tmp4
7185  ret <1 x i64> %tmp5
7186}
7187
7188define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
7189; CHECK-LABEL: define i64 @ursra_scalar(
7190; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7191; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7192; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7193; CHECK-NEXT:    call void @llvm.donothing()
7194; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7195; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7196; CHECK:       3:
7197; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7198; CHECK-NEXT:    unreachable
7199; CHECK:       4:
7200; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
7201; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7202; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7203; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7204; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
7205; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[_MSLD]], i64 -1)
7206; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
7207; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
7208; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7209; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7210; CHECK:       10:
7211; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7212; CHECK-NEXT:    unreachable
7213; CHECK:       11:
7214; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[B]], align 8
7215; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7216; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7217; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7218; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
7219; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
7220; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
7221; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7222; CHECK-NEXT:    ret i64 [[TMP5]]
7223;
7224  %tmp1 = load i64, ptr %A
7225  %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
7226  %tmp4 = load i64, ptr %B
7227  %tmp5 = add i64 %tmp3, %tmp4
7228  ret i64 %tmp5
7229}
7230
7231define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory {
7232; CHECK-LABEL: define <8 x i8> @srsra8b(
7233; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7234; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7235; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7236; CHECK-NEXT:    call void @llvm.donothing()
7237; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7238; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7239; CHECK:       3:
7240; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7241; CHECK-NEXT:    unreachable
7242; CHECK:       4:
7243; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
7244; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7245; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7246; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7247; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
7248; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
7249; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
7250; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
7251; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7252; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7253; CHECK:       10:
7254; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7255; CHECK-NEXT:    unreachable
7256; CHECK:       11:
7257; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
7258; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7259; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7260; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7261; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
7262; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
7263; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
7264; CHECK-NEXT:    store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7265; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
7266;
7267  %tmp1 = load <8 x i8>, ptr %A
7268  %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
7269  %tmp4 = load <8 x i8>, ptr %B
7270  %tmp5 = add <8 x i8> %tmp3, %tmp4
7271  ret <8 x i8> %tmp5
7272}
7273
7274define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory {
7275; CHECK-LABEL: define <4 x i16> @srsra4h(
7276; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7277; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7278; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7279; CHECK-NEXT:    call void @llvm.donothing()
7280; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7281; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7282; CHECK:       3:
7283; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7284; CHECK-NEXT:    unreachable
7285; CHECK:       4:
7286; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
7287; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7288; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7289; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7290; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
7291; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
7292; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
7293; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
7294; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7295; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7296; CHECK:       10:
7297; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7298; CHECK-NEXT:    unreachable
7299; CHECK:       11:
7300; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
7301; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7302; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7303; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7304; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
7305; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
7306; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
7307; CHECK-NEXT:    store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7308; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
7309;
7310  %tmp1 = load <4 x i16>, ptr %A
7311  %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
7312  %tmp4 = load <4 x i16>, ptr %B
7313  %tmp5 = add <4 x i16> %tmp3, %tmp4
7314  ret <4 x i16> %tmp5
7315}
7316
7317define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory {
7318; CHECK-LABEL: define <2 x i32> @srsra2s(
7319; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7320; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7321; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7322; CHECK-NEXT:    call void @llvm.donothing()
7323; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7324; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7325; CHECK:       3:
7326; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7327; CHECK-NEXT:    unreachable
7328; CHECK:       4:
7329; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
7330; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7331; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7332; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7333; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
7334; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
7335; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
7336; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
7337; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7338; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7339; CHECK:       10:
7340; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7341; CHECK-NEXT:    unreachable
7342; CHECK:       11:
7343; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
7344; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7345; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7346; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7347; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
7348; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
7349; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
7350; CHECK-NEXT:    store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7351; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
7352;
7353  %tmp1 = load <2 x i32>, ptr %A
7354  %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
7355  %tmp4 = load <2 x i32>, ptr %B
7356  %tmp5 = add <2 x i32> %tmp3, %tmp4
7357  ret <2 x i32> %tmp5
7358}
7359
7360define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory {
7361; CHECK-LABEL: define <16 x i8> @srsra16b(
7362; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7363; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7364; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7365; CHECK-NEXT:    call void @llvm.donothing()
7366; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7367; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7368; CHECK:       3:
7369; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7370; CHECK-NEXT:    unreachable
7371; CHECK:       4:
7372; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
7373; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7374; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7375; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7376; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
7377; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
7378; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
7379; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
7380; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7381; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7382; CHECK:       10:
7383; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7384; CHECK-NEXT:    unreachable
7385; CHECK:       11:
7386; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
7387; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7388; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7389; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7390; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
7391; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
7392; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
7393; CHECK-NEXT:    store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7394; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
7395;
7396  %tmp1 = load <16 x i8>, ptr %A
7397  %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
7398  %tmp4 = load <16 x i8>, ptr %B
7399  %tmp5 = add <16 x i8> %tmp3, %tmp4
7400  ret <16 x i8> %tmp5
7401}
7402
7403define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory {
7404; CHECK-LABEL: define <8 x i16> @srsra8h(
7405; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7406; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7407; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7408; CHECK-NEXT:    call void @llvm.donothing()
7409; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7410; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7411; CHECK:       3:
7412; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7413; CHECK-NEXT:    unreachable
7414; CHECK:       4:
7415; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
7416; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7417; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7418; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7419; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
7420; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
7421; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
7422; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
7423; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7424; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7425; CHECK:       10:
7426; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7427; CHECK-NEXT:    unreachable
7428; CHECK:       11:
7429; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
7430; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7431; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7432; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7433; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
7434; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
7435; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
7436; CHECK-NEXT:    store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7437; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
7438;
7439  %tmp1 = load <8 x i16>, ptr %A
7440  %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
7441  %tmp4 = load <8 x i16>, ptr %B
7442  %tmp5 = add <8 x i16> %tmp3, %tmp4
7443  ret <8 x i16> %tmp5
7444}
7445
7446define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory {
7447; CHECK-LABEL: define <4 x i32> @srsra4s(
7448; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7449; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7450; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7451; CHECK-NEXT:    call void @llvm.donothing()
7452; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7453; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7454; CHECK:       3:
7455; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7456; CHECK-NEXT:    unreachable
7457; CHECK:       4:
7458; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
7459; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7460; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7461; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7462; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
7463; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
7464; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
7465; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
7466; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7467; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7468; CHECK:       10:
7469; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7470; CHECK-NEXT:    unreachable
7471; CHECK:       11:
7472; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
7473; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7474; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7475; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7476; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
7477; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
7478; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
7479; CHECK-NEXT:    store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7480; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
7481;
7482  %tmp1 = load <4 x i32>, ptr %A
7483  %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
7484  %tmp4 = load <4 x i32>, ptr %B
7485  %tmp5 = add <4 x i32> %tmp3, %tmp4
7486  ret <4 x i32> %tmp5
7487}
7488
7489define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory {
7490; CHECK-LABEL: define <2 x i64> @srsra2d(
7491; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7492; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7493; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7494; CHECK-NEXT:    call void @llvm.donothing()
7495; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7496; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7497; CHECK:       3:
7498; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7499; CHECK-NEXT:    unreachable
7500; CHECK:       4:
7501; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
7502; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7503; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7504; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7505; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
7506; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
7507; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
7508; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
7509; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7510; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7511; CHECK:       10:
7512; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7513; CHECK-NEXT:    unreachable
7514; CHECK:       11:
7515; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
7516; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7517; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7518; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7519; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
7520; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
7521; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
7522; CHECK-NEXT:    store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7523; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
7524;
7525  %tmp1 = load <2 x i64>, ptr %A
7526  %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
7527  %tmp4 = load <2 x i64>, ptr %B
7528  %tmp5 = add <2 x i64> %tmp3, %tmp4
7529  ret <2 x i64> %tmp5
7530}
7531
7532define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory {
7533; CHECK-LABEL: define <1 x i64> @srsra1d(
7534; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7535; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7536; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7537; CHECK-NEXT:    call void @llvm.donothing()
7538; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7539; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7540; CHECK:       3:
7541; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7542; CHECK-NEXT:    unreachable
7543; CHECK:       4:
7544; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
7545; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7546; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7547; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7548; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
7549; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
7550; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
7551; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
7552; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7553; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7554; CHECK:       10:
7555; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7556; CHECK-NEXT:    unreachable
7557; CHECK:       11:
7558; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
7559; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7560; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7561; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7562; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
7563; CHECK-NEXT:    [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
7564; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
7565; CHECK-NEXT:    store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7566; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
7567;
7568  %tmp1 = load <1 x i64>, ptr %A
7569  %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
7570  %tmp4 = load <1 x i64>, ptr %B
7571  %tmp5 = add <1 x i64> %tmp3, %tmp4
7572  ret <1 x i64> %tmp5
7573}
7574
7575define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
7576; CHECK-LABEL: define i64 @srsra_scalar(
7577; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7578; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7579; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7580; CHECK-NEXT:    call void @llvm.donothing()
7581; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7582; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7583; CHECK:       3:
7584; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7585; CHECK-NEXT:    unreachable
7586; CHECK:       4:
7587; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A]], align 8
7588; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7589; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7590; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7591; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
7592; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[_MSLD]], i64 -1)
7593; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
7594; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
7595; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
7596; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
7597; CHECK:       10:
7598; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7599; CHECK-NEXT:    unreachable
7600; CHECK:       11:
7601; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[B]], align 8
7602; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
7603; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
7604; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
7605; CHECK-NEXT:    [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
7606; CHECK-NEXT:    [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
7607; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
7608; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
7609; CHECK-NEXT:    ret i64 [[TMP5]]
7610;
7611  %tmp1 = load i64, ptr %A
7612  %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
7613  %tmp4 = load i64, ptr %B
7614  %tmp5 = add i64 %tmp3, %tmp4
7615  ret i64 %tmp5
7616}
7617
7618define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory {
7619; CHECK-LABEL: define <8 x i8> @usra8b(
7620; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7621; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7622; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7623; CHECK-NEXT:    call void @llvm.donothing()
7624; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7625; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7626; CHECK:       3:
7627; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7628; CHECK-NEXT:    unreachable
7629; CHECK:       4:
7630; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
7631; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7632; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7633; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7634; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
7635; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1)
7636; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
7637; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1)
7638; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7639; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7640; CHECK:       10:
7641; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7642; CHECK-NEXT:    unreachable
7643; CHECK:       11:
7644; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
7645; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7646; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7647; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7648; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
7649; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
7650; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
7651; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7652; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
7653;
7654  %tmp1 = load <8 x i8>, ptr %A
7655  %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
7656  %tmp4 = load <8 x i8>, ptr %B
7657  %tmp5 = add <8 x i8> %tmp3, %tmp4
7658  ret <8 x i8> %tmp5
7659}
7660
7661define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory {
7662; CHECK-LABEL: define <4 x i16> @usra4h(
7663; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7664; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7665; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7666; CHECK-NEXT:    call void @llvm.donothing()
7667; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7668; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7669; CHECK:       3:
7670; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7671; CHECK-NEXT:    unreachable
7672; CHECK:       4:
7673; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
7674; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7675; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7676; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7677; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
7678; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1)
7679; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
7680; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
7681; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7682; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7683; CHECK:       10:
7684; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7685; CHECK-NEXT:    unreachable
7686; CHECK:       11:
7687; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
7688; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7689; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7690; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7691; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
7692; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
7693; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
7694; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7695; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
7696;
7697  %tmp1 = load <4 x i16>, ptr %A
7698  %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
7699  %tmp4 = load <4 x i16>, ptr %B
7700  %tmp5 = add <4 x i16> %tmp3, %tmp4
7701  ret <4 x i16> %tmp5
7702}
7703
7704define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory {
7705; CHECK-LABEL: define <2 x i32> @usra2s(
7706; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7707; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7708; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7709; CHECK-NEXT:    call void @llvm.donothing()
7710; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7711; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7712; CHECK:       3:
7713; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7714; CHECK-NEXT:    unreachable
7715; CHECK:       4:
7716; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
7717; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7718; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7719; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7720; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
7721; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1)
7722; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
7723; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
7724; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7725; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7726; CHECK:       10:
7727; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7728; CHECK-NEXT:    unreachable
7729; CHECK:       11:
7730; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
7731; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7732; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7733; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7734; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
7735; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
7736; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
7737; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7738; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
7739;
7740  %tmp1 = load <2 x i32>, ptr %A
7741  %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
7742  %tmp4 = load <2 x i32>, ptr %B
7743  %tmp5 = add <2 x i32> %tmp3, %tmp4
7744  ret <2 x i32> %tmp5
7745}
7746
7747define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory {
7748; CHECK-LABEL: define <16 x i8> @usra16b(
7749; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7750; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7751; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7752; CHECK-NEXT:    call void @llvm.donothing()
7753; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7754; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7755; CHECK:       3:
7756; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7757; CHECK-NEXT:    unreachable
7758; CHECK:       4:
7759; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
7760; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7761; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7762; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7763; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
7764; CHECK-NEXT:    [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1)
7765; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
7766; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1)
7767; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7768; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7769; CHECK:       10:
7770; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7771; CHECK-NEXT:    unreachable
7772; CHECK:       11:
7773; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
7774; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7775; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7776; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7777; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
7778; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
7779; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
7780; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7781; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
7782;
7783  %tmp1 = load <16 x i8>, ptr %A
7784  %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
7785  %tmp4 = load <16 x i8>, ptr %B
7786  %tmp5 = add <16 x i8> %tmp3, %tmp4
7787  ret <16 x i8> %tmp5
7788}
7789
7790define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory {
7791; CHECK-LABEL: define <8 x i16> @usra8h(
7792; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7793; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7794; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7795; CHECK-NEXT:    call void @llvm.donothing()
7796; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7797; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7798; CHECK:       3:
7799; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7800; CHECK-NEXT:    unreachable
7801; CHECK:       4:
7802; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
7803; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7804; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7805; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7806; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
7807; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
7808; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
7809; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
7810; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7811; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7812; CHECK:       10:
7813; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7814; CHECK-NEXT:    unreachable
7815; CHECK:       11:
7816; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
7817; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7818; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7819; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7820; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
7821; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
7822; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
7823; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7824; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
7825;
7826  %tmp1 = load <8 x i16>, ptr %A
7827  %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
7828  %tmp4 = load <8 x i16>, ptr %B
7829  %tmp5 = add <8 x i16> %tmp3, %tmp4
7830  ret <8 x i16> %tmp5
7831}
7832
7833define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory {
7834; CHECK-LABEL: define <4 x i32> @usra4s(
7835; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7836; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7837; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7838; CHECK-NEXT:    call void @llvm.donothing()
7839; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7840; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7841; CHECK:       3:
7842; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7843; CHECK-NEXT:    unreachable
7844; CHECK:       4:
7845; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
7846; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7847; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7848; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7849; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
7850; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
7851; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
7852; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
7853; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7854; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7855; CHECK:       10:
7856; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7857; CHECK-NEXT:    unreachable
7858; CHECK:       11:
7859; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
7860; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7861; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7862; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7863; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
7864; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
7865; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
7866; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7867; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
7868;
7869  %tmp1 = load <4 x i32>, ptr %A
7870  %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
7871  %tmp4 = load <4 x i32>, ptr %B
7872  %tmp5 = add <4 x i32> %tmp3, %tmp4
7873  ret <4 x i32> %tmp5
7874}
7875
7876define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory {
7877; CHECK-LABEL: define <2 x i64> @usra2d(
7878; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7879; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7880; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7881; CHECK-NEXT:    call void @llvm.donothing()
7882; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7883; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7884; CHECK:       3:
7885; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7886; CHECK-NEXT:    unreachable
7887; CHECK:       4:
7888; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
7889; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7890; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7891; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7892; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
7893; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
7894; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
7895; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
7896; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7897; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7898; CHECK:       10:
7899; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7900; CHECK-NEXT:    unreachable
7901; CHECK:       11:
7902; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
7903; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7904; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7905; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7906; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
7907; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
7908; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
7909; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7910; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
7911;
7912  %tmp1 = load <2 x i64>, ptr %A
7913  %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
7914  %tmp4 = load <2 x i64>, ptr %B
7915  %tmp5 = add <2 x i64> %tmp3, %tmp4
7916  ret <2 x i64> %tmp5
7917}
7918
7919define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory {
7920; CHECK-LABEL: define <1 x i64> @usra1d(
7921; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7922; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7923; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7924; CHECK-NEXT:    call void @llvm.donothing()
7925; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7926; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7927; CHECK:       3:
7928; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7929; CHECK-NEXT:    unreachable
7930; CHECK:       4:
7931; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
7932; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7933; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7934; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7935; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
7936; CHECK-NEXT:    [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], splat (i64 1)
7937; CHECK-NEXT:    [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
7938; CHECK-NEXT:    [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
7939; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7940; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7941; CHECK:       10:
7942; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7943; CHECK-NEXT:    unreachable
7944; CHECK:       11:
7945; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
7946; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7947; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7948; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7949; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP14]], align 8
7950; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP9]], [[_MSLD1]]
7951; CHECK-NEXT:    [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
7952; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7953; CHECK-NEXT:    ret <1 x i64> [[TMP5]]
7954;
7955  %tmp1 = load <1 x i64>, ptr %A
7956  %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
7957  %tmp4 = load <1 x i64>, ptr %B
7958  %tmp5 = add <1 x i64> %tmp3, %tmp4
7959  ret <1 x i64> %tmp5
7960}
7961
7962define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory {
7963; CHECK-LABEL: define <8 x i8> @ssra8b(
7964; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
7965; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
7966; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
7967; CHECK-NEXT:    call void @llvm.donothing()
7968; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
7969; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
7970; CHECK:       3:
7971; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7972; CHECK-NEXT:    unreachable
7973; CHECK:       4:
7974; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
7975; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
7976; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
7977; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
7978; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
7979; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], splat (i8 1)
7980; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
7981; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], splat (i8 1)
7982; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
7983; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
7984; CHECK:       10:
7985; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
7986; CHECK-NEXT:    unreachable
7987; CHECK:       11:
7988; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
7989; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
7990; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
7991; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
7992; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
7993; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
7994; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
7995; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
7996; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
7997;
7998  %tmp1 = load <8 x i8>, ptr %A
7999  %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8000  %tmp4 = load <8 x i8>, ptr %B
8001  %tmp5 = add <8 x i8> %tmp3, %tmp4
8002  ret <8 x i8> %tmp5
8003}
8004
8005define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory {
8006; CHECK-LABEL: define <4 x i16> @ssra4h(
8007; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8008; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8009; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8010; CHECK-NEXT:    call void @llvm.donothing()
8011; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8012; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8013; CHECK:       3:
8014; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8015; CHECK-NEXT:    unreachable
8016; CHECK:       4:
8017; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
8018; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8019; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8020; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8021; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
8022; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], splat (i16 1)
8023; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
8024; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1)
8025; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8026; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8027; CHECK:       10:
8028; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8029; CHECK-NEXT:    unreachable
8030; CHECK:       11:
8031; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
8032; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8033; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8034; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8035; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
8036; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
8037; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
8038; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8039; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
8040;
8041  %tmp1 = load <4 x i16>, ptr %A
8042  %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
8043  %tmp4 = load <4 x i16>, ptr %B
8044  %tmp5 = add <4 x i16> %tmp3, %tmp4
8045  ret <4 x i16> %tmp5
8046}
8047
8048define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory {
8049; CHECK-LABEL: define <2 x i32> @ssra2s(
8050; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8051; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8052; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8053; CHECK-NEXT:    call void @llvm.donothing()
8054; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8055; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8056; CHECK:       3:
8057; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8058; CHECK-NEXT:    unreachable
8059; CHECK:       4:
8060; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
8061; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8062; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8063; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8064; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
8065; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], splat (i32 1)
8066; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
8067; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1)
8068; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8069; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8070; CHECK:       10:
8071; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8072; CHECK-NEXT:    unreachable
8073; CHECK:       11:
8074; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
8075; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8076; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8077; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8078; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
8079; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
8080; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
8081; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8082; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
8083;
8084  %tmp1 = load <2 x i32>, ptr %A
8085  %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
8086  %tmp4 = load <2 x i32>, ptr %B
8087  %tmp5 = add <2 x i32> %tmp3, %tmp4
8088  ret <2 x i32> %tmp5
8089}
8090
8091define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory {
8092; CHECK-LABEL: define <16 x i8> @ssra16b(
8093; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8094; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8095; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8096; CHECK-NEXT:    call void @llvm.donothing()
8097; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8098; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8099; CHECK:       3:
8100; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8101; CHECK-NEXT:    unreachable
8102; CHECK:       4:
8103; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
8104; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8105; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8106; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8107; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
8108; CHECK-NEXT:    [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], splat (i8 1)
8109; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
8110; CHECK-NEXT:    [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 1)
8111; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8112; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8113; CHECK:       10:
8114; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8115; CHECK-NEXT:    unreachable
8116; CHECK:       11:
8117; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
8118; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8119; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8120; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8121; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
8122; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
8123; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
8124; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8125; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
8126;
8127  %tmp1 = load <16 x i8>, ptr %A
8128  %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8129  %tmp4 = load <16 x i8>, ptr %B
8130  %tmp5 = add <16 x i8> %tmp3, %tmp4
8131  ret <16 x i8> %tmp5
8132}
8133
8134define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory {
8135; CHECK-LABEL: define <8 x i16> @ssra8h(
8136; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8137; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8138; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8139; CHECK-NEXT:    call void @llvm.donothing()
8140; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8141; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8142; CHECK:       3:
8143; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8144; CHECK-NEXT:    unreachable
8145; CHECK:       4:
8146; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
8147; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8148; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8149; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8150; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
8151; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], splat (i16 1)
8152; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
8153; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
8154; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8155; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8156; CHECK:       10:
8157; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8158; CHECK-NEXT:    unreachable
8159; CHECK:       11:
8160; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
8161; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8162; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8163; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8164; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
8165; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
8166; CHECK-NEXT:    [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
8167; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8168; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
8169;
8170  %tmp1 = load <8 x i16>, ptr %A
8171  %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
8172  %tmp4 = load <8 x i16>, ptr %B
8173  %tmp5 = add <8 x i16> %tmp3, %tmp4
8174  ret <8 x i16> %tmp5
8175}
8176
8177define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory {
8178; CHECK-LABEL: define <4 x i32> @ssra4s(
8179; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8180; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8181; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8182; CHECK-NEXT:    call void @llvm.donothing()
8183; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8184; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8185; CHECK:       3:
8186; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8187; CHECK-NEXT:    unreachable
8188; CHECK:       4:
8189; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
8190; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8191; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8192; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8193; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
8194; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], splat (i32 1)
8195; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
8196; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
8197; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8198; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8199; CHECK:       10:
8200; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8201; CHECK-NEXT:    unreachable
8202; CHECK:       11:
8203; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
8204; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8205; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8206; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8207; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
8208; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
8209; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
8210; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8211; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
8212;
8213  %tmp1 = load <4 x i32>, ptr %A
8214  %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
8215  %tmp4 = load <4 x i32>, ptr %B
8216  %tmp5 = add <4 x i32> %tmp3, %tmp4
8217  ret <4 x i32> %tmp5
8218}
8219
8220define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory {
8221; CHECK-LABEL: define <2 x i64> @ssra2d(
8222; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8223; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8224; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8225; CHECK-NEXT:    call void @llvm.donothing()
8226; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8227; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8228; CHECK:       3:
8229; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8230; CHECK-NEXT:    unreachable
8231; CHECK:       4:
8232; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
8233; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8234; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8235; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8236; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
8237; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], splat (i64 1)
8238; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
8239; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
8240; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8241; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
8242; CHECK:       10:
8243; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8244; CHECK-NEXT:    unreachable
8245; CHECK:       11:
8246; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
8247; CHECK-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
8248; CHECK-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
8249; CHECK-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
8250; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
8251; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
8252; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
8253; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
8254; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
8255;
8256  %tmp1 = load <2 x i64>, ptr %A
8257  %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
8258  %tmp4 = load <2 x i64>, ptr %B
8259  %tmp5 = add <2 x i64> %tmp3, %tmp4
8260  ret <2 x i64> %tmp5
8261}
8262
8263define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
8264; CHECK-LABEL: define <8 x i8> @shr_orr8b(
8265; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8266; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8267; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8268; CHECK-NEXT:    call void @llvm.donothing()
8269; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8270; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8271; CHECK:       3:
8272; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8273; CHECK-NEXT:    unreachable
8274; CHECK:       4:
8275; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
8276; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8277; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8278; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8279; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
8280; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8281; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8282; CHECK:       8:
8283; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8284; CHECK-NEXT:    unreachable
8285; CHECK:       9:
8286; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
8287; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8288; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8289; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8290; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
8291; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1)
8292; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
8293; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1)
8294; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1)
8295; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1)
8296; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
8297; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
8298; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
8299; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
8300; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
8301; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
8302; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
8303; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
8304;
8305  %tmp1 = load <8 x i8>, ptr %A
8306  %tmp4 = load <8 x i8>, ptr %B
8307  %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8308  %tmp5 = or <8 x i8> %tmp3, %tmp4
8309  ret <8 x i8> %tmp5
8310}
8311
8312define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
8313; CHECK-LABEL: define <4 x i16> @shr_orr4h(
8314; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8315; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8316; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8317; CHECK-NEXT:    call void @llvm.donothing()
8318; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8319; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8320; CHECK:       3:
8321; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8322; CHECK-NEXT:    unreachable
8323; CHECK:       4:
8324; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
8325; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8326; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8327; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8328; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
8329; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8330; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8331; CHECK:       8:
8332; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8333; CHECK-NEXT:    unreachable
8334; CHECK:       9:
8335; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
8336; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8337; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8338; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8339; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
8340; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1)
8341; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
8342; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
8343; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1)
8344; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1)
8345; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
8346; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
8347; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
8348; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
8349; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
8350; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
8351; CHECK-NEXT:    store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
8352; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
8353;
8354  %tmp1 = load <4 x i16>, ptr %A
8355  %tmp4 = load <4 x i16>, ptr %B
8356  %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
8357  %tmp5 = or <4 x i16> %tmp3, %tmp4
8358  ret <4 x i16> %tmp5
8359}
8360
8361define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
8362; CHECK-LABEL: define <2 x i32> @shr_orr2s(
8363; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8364; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8365; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8366; CHECK-NEXT:    call void @llvm.donothing()
8367; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8368; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8369; CHECK:       3:
8370; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8371; CHECK-NEXT:    unreachable
8372; CHECK:       4:
8373; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
8374; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8375; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8376; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8377; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
8378; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8379; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8380; CHECK:       8:
8381; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8382; CHECK-NEXT:    unreachable
8383; CHECK:       9:
8384; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
8385; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8386; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8387; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8388; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
8389; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1)
8390; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
8391; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
8392; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
8393; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1)
8394; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
8395; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
8396; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
8397; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
8398; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
8399; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
8400; CHECK-NEXT:    store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
8401; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
8402;
8403  %tmp1 = load <2 x i32>, ptr %A
8404  %tmp4 = load <2 x i32>, ptr %B
8405  %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
8406  %tmp5 = or <2 x i32> %tmp3, %tmp4
8407  ret <2 x i32> %tmp5
8408}
8409
8410define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
8411; CHECK-LABEL: define <16 x i8> @shr_orr16b(
8412; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8413; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8414; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8415; CHECK-NEXT:    call void @llvm.donothing()
8416; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8417; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8418; CHECK:       3:
8419; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8420; CHECK-NEXT:    unreachable
8421; CHECK:       4:
8422; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
8423; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8424; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8425; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8426; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
8427; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8428; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8429; CHECK:       8:
8430; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8431; CHECK-NEXT:    unreachable
8432; CHECK:       9:
8433; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
8434; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8435; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8436; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8437; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
8438; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1)
8439; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
8440; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1)
8441; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1)
8442; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1)
8443; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
8444; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
8445; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
8446; CHECK-NEXT:    [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
8447; CHECK-NEXT:    [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
8448; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
8449; CHECK-NEXT:    store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
8450; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
8451;
8452  %tmp1 = load <16 x i8>, ptr %A
8453  %tmp4 = load <16 x i8>, ptr %B
8454  %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8455  %tmp5 = or <16 x i8> %tmp3, %tmp4
8456  ret <16 x i8> %tmp5
8457}
8458
8459define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
8460; CHECK-LABEL: define <8 x i16> @shr_orr8h(
8461; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8462; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8463; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8464; CHECK-NEXT:    call void @llvm.donothing()
8465; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8466; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8467; CHECK:       3:
8468; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8469; CHECK-NEXT:    unreachable
8470; CHECK:       4:
8471; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
8472; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8473; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8474; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8475; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
8476; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8477; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8478; CHECK:       8:
8479; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8480; CHECK-NEXT:    unreachable
8481; CHECK:       9:
8482; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
8483; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8484; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8485; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8486; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
8487; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
8488; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
8489; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
8490; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1)
8491; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1)
8492; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
8493; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
8494; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
8495; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
8496; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
8497; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
8498; CHECK-NEXT:    store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
8499; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
8500;
8501  %tmp1 = load <8 x i16>, ptr %A
8502  %tmp4 = load <8 x i16>, ptr %B
8503  %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
8504  %tmp5 = or <8 x i16> %tmp3, %tmp4
8505  ret <8 x i16> %tmp5
8506}
8507
8508define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
8509; CHECK-LABEL: define <4 x i32> @shr_orr4s(
8510; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8511; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8512; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8513; CHECK-NEXT:    call void @llvm.donothing()
8514; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8515; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8516; CHECK:       3:
8517; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8518; CHECK-NEXT:    unreachable
8519; CHECK:       4:
8520; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
8521; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8522; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8523; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8524; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
8525; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8526; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8527; CHECK:       8:
8528; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8529; CHECK-NEXT:    unreachable
8530; CHECK:       9:
8531; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
8532; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8533; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8534; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8535; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
8536; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
8537; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
8538; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
8539; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1)
8540; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
8541; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
8542; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
8543; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
8544; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
8545; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
8546; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
8547; CHECK-NEXT:    store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
8548; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
8549;
8550  %tmp1 = load <4 x i32>, ptr %A
8551  %tmp4 = load <4 x i32>, ptr %B
8552  %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
8553  %tmp5 = or <4 x i32> %tmp3, %tmp4
8554  ret <4 x i32> %tmp5
8555}
8556
8557define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
8558; CHECK-LABEL: define <2 x i64> @shr_orr2d(
8559; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8560; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8561; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8562; CHECK-NEXT:    call void @llvm.donothing()
8563; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8564; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8565; CHECK:       3:
8566; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8567; CHECK-NEXT:    unreachable
8568; CHECK:       4:
8569; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
8570; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8571; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8572; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8573; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
8574; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8575; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8576; CHECK:       8:
8577; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8578; CHECK-NEXT:    unreachable
8579; CHECK:       9:
8580; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
8581; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8582; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8583; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8584; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
8585; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
8586; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
8587; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
8588; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1)
8589; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1)
8590; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
8591; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
8592; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
8593; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
8594; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
8595; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
8596; CHECK-NEXT:    store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
8597; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
8598;
8599  %tmp1 = load <2 x i64>, ptr %A
8600  %tmp4 = load <2 x i64>, ptr %B
8601  %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
8602  %tmp5 = or <2 x i64> %tmp3, %tmp4
8603  ret <2 x i64> %tmp5
8604}
8605
8606define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
8607; CHECK-LABEL: define <8 x i8> @shl_orr8b(
8608; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8609; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8610; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8611; CHECK-NEXT:    call void @llvm.donothing()
8612; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8613; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8614; CHECK:       3:
8615; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8616; CHECK-NEXT:    unreachable
8617; CHECK:       4:
8618; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
8619; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8620; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8621; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8622; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
8623; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8624; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8625; CHECK:       8:
8626; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8627; CHECK-NEXT:    unreachable
8628; CHECK:       9:
8629; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
8630; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8631; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8632; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8633; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
8634; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], splat (i8 1)
8635; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
8636; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], splat (i8 1)
8637; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1)
8638; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1)
8639; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
8640; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
8641; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
8642; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
8643; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
8644; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
8645; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
8646; CHECK-NEXT:    ret <8 x i8> [[TMP5]]
8647;
8648  %tmp1 = load <8 x i8>, ptr %A
8649  %tmp4 = load <8 x i8>, ptr %B
8650  %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8651  %tmp5 = or <8 x i8> %tmp3, %tmp4
8652  ret <8 x i8> %tmp5
8653}
8654
8655define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
8656; CHECK-LABEL: define <4 x i16> @shl_orr4h(
8657; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8658; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8659; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8660; CHECK-NEXT:    call void @llvm.donothing()
8661; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8662; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8663; CHECK:       3:
8664; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8665; CHECK-NEXT:    unreachable
8666; CHECK:       4:
8667; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
8668; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8669; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8670; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8671; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
8672; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8673; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8674; CHECK:       8:
8675; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8676; CHECK-NEXT:    unreachable
8677; CHECK:       9:
8678; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
8679; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8680; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8681; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8682; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
8683; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], splat (i16 1)
8684; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
8685; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
8686; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1)
8687; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1)
8688; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
8689; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
8690; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
8691; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
8692; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
8693; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
8694; CHECK-NEXT:    store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
8695; CHECK-NEXT:    ret <4 x i16> [[TMP5]]
8696;
8697  %tmp1 = load <4 x i16>, ptr %A
8698  %tmp4 = load <4 x i16>, ptr %B
8699  %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
8700  %tmp5 = or <4 x i16> %tmp3, %tmp4
8701  ret <4 x i16> %tmp5
8702}
8703
8704define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
8705; CHECK-LABEL: define <2 x i32> @shl_orr2s(
8706; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8707; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8708; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8709; CHECK-NEXT:    call void @llvm.donothing()
8710; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8711; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8712; CHECK:       3:
8713; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8714; CHECK-NEXT:    unreachable
8715; CHECK:       4:
8716; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
8717; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8718; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8719; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8720; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
8721; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8722; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8723; CHECK:       8:
8724; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8725; CHECK-NEXT:    unreachable
8726; CHECK:       9:
8727; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
8728; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8729; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8730; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8731; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
8732; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], splat (i32 1)
8733; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
8734; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
8735; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
8736; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1)
8737; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
8738; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
8739; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
8740; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
8741; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
8742; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
8743; CHECK-NEXT:    store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
8744; CHECK-NEXT:    ret <2 x i32> [[TMP5]]
8745;
8746  %tmp1 = load <2 x i32>, ptr %A
8747  %tmp4 = load <2 x i32>, ptr %B
8748  %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
8749  %tmp5 = or <2 x i32> %tmp3, %tmp4
8750  ret <2 x i32> %tmp5
8751}
8752
8753define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
8754; CHECK-LABEL: define <16 x i8> @shl_orr16b(
8755; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8756; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8757; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8758; CHECK-NEXT:    call void @llvm.donothing()
8759; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8760; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8761; CHECK:       3:
8762; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8763; CHECK-NEXT:    unreachable
8764; CHECK:       4:
8765; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
8766; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8767; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8768; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8769; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
8770; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8771; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8772; CHECK:       8:
8773; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8774; CHECK-NEXT:    unreachable
8775; CHECK:       9:
8776; CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
8777; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8778; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8779; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8780; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
8781; CHECK-NEXT:    [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], splat (i8 1)
8782; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
8783; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1)
8784; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1)
8785; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1)
8786; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
8787; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
8788; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
8789; CHECK-NEXT:    [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
8790; CHECK-NEXT:    [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
8791; CHECK-NEXT:    [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
8792; CHECK-NEXT:    store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
8793; CHECK-NEXT:    ret <16 x i8> [[TMP5]]
8794;
8795  %tmp1 = load <16 x i8>, ptr %A
8796  %tmp4 = load <16 x i8>, ptr %B
8797  %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
8798  %tmp5 = or <16 x i8> %tmp3, %tmp4
8799  ret <16 x i8> %tmp5
8800}
8801
8802define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
8803; CHECK-LABEL: define <8 x i16> @shl_orr8h(
8804; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8805; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8806; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8807; CHECK-NEXT:    call void @llvm.donothing()
8808; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8809; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8810; CHECK:       3:
8811; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8812; CHECK-NEXT:    unreachable
8813; CHECK:       4:
8814; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
8815; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8816; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8817; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8818; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
8819; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8820; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8821; CHECK:       8:
8822; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8823; CHECK-NEXT:    unreachable
8824; CHECK:       9:
8825; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
8826; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8827; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8828; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8829; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
8830; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], splat (i16 1)
8831; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
8832; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
8833; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1)
8834; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1)
8835; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
8836; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
8837; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
8838; CHECK-NEXT:    [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
8839; CHECK-NEXT:    [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
8840; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
8841; CHECK-NEXT:    store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
8842; CHECK-NEXT:    ret <8 x i16> [[TMP5]]
8843;
8844  %tmp1 = load <8 x i16>, ptr %A
8845  %tmp4 = load <8 x i16>, ptr %B
8846  %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
8847  %tmp5 = or <8 x i16> %tmp3, %tmp4
8848  ret <8 x i16> %tmp5
8849}
8850
8851define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
8852; CHECK-LABEL: define <4 x i32> @shl_orr4s(
8853; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8854; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8855; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8856; CHECK-NEXT:    call void @llvm.donothing()
8857; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8858; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8859; CHECK:       3:
8860; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8861; CHECK-NEXT:    unreachable
8862; CHECK:       4:
8863; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
8864; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8865; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8866; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8867; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
8868; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8869; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8870; CHECK:       8:
8871; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8872; CHECK-NEXT:    unreachable
8873; CHECK:       9:
8874; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
8875; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8876; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8877; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8878; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
8879; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], splat (i32 1)
8880; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
8881; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
8882; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1)
8883; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
8884; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
8885; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
8886; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
8887; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
8888; CHECK-NEXT:    [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
8889; CHECK-NEXT:    [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
8890; CHECK-NEXT:    store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
8891; CHECK-NEXT:    ret <4 x i32> [[TMP5]]
8892;
8893  %tmp1 = load <4 x i32>, ptr %A
8894  %tmp4 = load <4 x i32>, ptr %B
8895  %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
8896  %tmp5 = or <4 x i32> %tmp3, %tmp4
8897  ret <4 x i32> %tmp5
8898}
8899
8900define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
8901; CHECK-LABEL: define <2 x i64> @shl_orr2d(
8902; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8903; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8904; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8905; CHECK-NEXT:    call void @llvm.donothing()
8906; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8907; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8908; CHECK:       3:
8909; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8910; CHECK-NEXT:    unreachable
8911; CHECK:       4:
8912; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
8913; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
8914; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
8915; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
8916; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
8917; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
8918; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
8919; CHECK:       8:
8920; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8921; CHECK-NEXT:    unreachable
8922; CHECK:       9:
8923; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
8924; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
8925; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
8926; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
8927; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
8928; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], splat (i64 1)
8929; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
8930; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
8931; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1)
8932; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1)
8933; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
8934; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
8935; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
8936; CHECK-NEXT:    [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
8937; CHECK-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
8938; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
8939; CHECK-NEXT:    store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
8940; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
8941;
8942  %tmp1 = load <2 x i64>, ptr %A
8943  %tmp4 = load <2 x i64>, ptr %B
8944  %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
8945  %tmp5 = or <2 x i64> %tmp3, %tmp4
8946  ret <2 x i64> %tmp5
8947}
8948
8949define <8 x i16> @shll(<8 x i8> %in) sanitize_memory {
8950; CHECK-LABEL: define <8 x i16> @shll(
8951; CHECK-SAME: <8 x i8> [[IN:%.*]]) #[[ATTR3:[0-9]+]] {
8952; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
8953; CHECK-NEXT:    call void @llvm.donothing()
8954; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
8955; CHECK-NEXT:    [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16>
8956; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 8)
8957; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
8958; CHECK-NEXT:    [[RES:%.*]] = shl <8 x i16> [[EXT]], splat (i16 8)
8959; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
8960; CHECK-NEXT:    ret <8 x i16> [[RES]]
8961;
8962  %ext = zext <8 x i8> %in to <8 x i16>
8963  %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
8964  ret <8 x i16> %res
8965}
8966
8967define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory {
8968; CHECK-LABEL: define <4 x i32> @shll_high(
8969; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] {
8970; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
8971; CHECK-NEXT:    call void @llvm.donothing()
8972; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8973; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8974; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
8975; CHECK-NEXT:    [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32>
8976; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 16)
8977; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
8978; CHECK-NEXT:    [[RES:%.*]] = shl <4 x i32> [[EXT]], splat (i32 16)
8979; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
8980; CHECK-NEXT:    ret <4 x i32> [[RES]]
8981;
8982  %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8983  %ext = zext <4 x i16> %extract to <4 x i32>
8984  %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
8985  ret <4 x i32> %res
8986}
8987
8988define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
8989; CHECK-LABEL: define <8 x i8> @sli8b(
8990; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
8991; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
8992; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
8993; CHECK-NEXT:    call void @llvm.donothing()
8994; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
8995; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
8996; CHECK:       3:
8997; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
8998; CHECK-NEXT:    unreachable
8999; CHECK:       4:
9000; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
9001; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9002; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9003; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9004; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
9005; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9006; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9007; CHECK:       8:
9008; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9009; CHECK-NEXT:    unreachable
9010; CHECK:       9:
9011; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
9012; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9013; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9014; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9015; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
9016; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
9017; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
9018; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
9019; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
9020; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9021; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9022; CHECK:       15:
9023; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9024; CHECK-NEXT:    unreachable
9025; CHECK:       16:
9026; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
9027; CHECK-NEXT:    store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
9028; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
9029;
9030  %tmp1 = load <8 x i8>, ptr %A
9031  %tmp2 = load <8 x i8>, ptr %B
9032  %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
9033  ret <8 x i8> %tmp3
9034}
9035
9036define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
9037; CHECK-LABEL: define <4 x i16> @sli4h(
9038; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9039; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9040; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9041; CHECK-NEXT:    call void @llvm.donothing()
9042; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9043; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9044; CHECK:       3:
9045; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9046; CHECK-NEXT:    unreachable
9047; CHECK:       4:
9048; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
9049; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9050; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9051; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9052; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
9053; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9054; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9055; CHECK:       8:
9056; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9057; CHECK-NEXT:    unreachable
9058; CHECK:       9:
9059; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
9060; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9061; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9062; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9063; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
9064; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
9065; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
9066; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
9067; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
9068; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9069; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9070; CHECK:       15:
9071; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9072; CHECK-NEXT:    unreachable
9073; CHECK:       16:
9074; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]], i32 1)
9075; CHECK-NEXT:    store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
9076; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
9077;
9078  %tmp1 = load <4 x i16>, ptr %A
9079  %tmp2 = load <4 x i16>, ptr %B
9080  %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
9081  ret <4 x i16> %tmp3
9082}
9083
9084define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
9085; CHECK-LABEL: define <2 x i32> @sli2s(
9086; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9087; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9088; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9089; CHECK-NEXT:    call void @llvm.donothing()
9090; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9091; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9092; CHECK:       3:
9093; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9094; CHECK-NEXT:    unreachable
9095; CHECK:       4:
9096; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
9097; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9098; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9099; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9100; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
9101; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9102; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9103; CHECK:       8:
9104; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9105; CHECK-NEXT:    unreachable
9106; CHECK:       9:
9107; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
9108; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9109; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9110; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9111; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
9112; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
9113; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
9114; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
9115; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
9116; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9117; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9118; CHECK:       15:
9119; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9120; CHECK-NEXT:    unreachable
9121; CHECK:       16:
9122; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 1)
9123; CHECK-NEXT:    store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
9124; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
9125;
9126  %tmp1 = load <2 x i32>, ptr %A
9127  %tmp2 = load <2 x i32>, ptr %B
9128  %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
9129  ret <2 x i32> %tmp3
9130}
9131
9132define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
9133; CHECK-LABEL: define <1 x i64> @sli1d(
9134; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9135; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9136; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9137; CHECK-NEXT:    call void @llvm.donothing()
9138; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9139; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9140; CHECK:       3:
9141; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9142; CHECK-NEXT:    unreachable
9143; CHECK:       4:
9144; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
9145; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9146; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9147; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9148; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
9149; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9150; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9151; CHECK:       8:
9152; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9153; CHECK-NEXT:    unreachable
9154; CHECK:       9:
9155; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
9156; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9157; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9158; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9159; CHECK-NEXT:    [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
9160; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD]] to i64
9161; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
9162; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
9163; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
9164; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9165; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9166; CHECK:       15:
9167; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9168; CHECK-NEXT:    unreachable
9169; CHECK:       16:
9170; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]], i32 1)
9171; CHECK-NEXT:    store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9172; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
9173;
9174  %tmp1 = load <1 x i64>, ptr %A
9175  %tmp2 = load <1 x i64>, ptr %B
9176  %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
9177  ret <1 x i64> %tmp3
9178}
9179
9180define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
9181; CHECK-LABEL: define <16 x i8> @sli16b(
9182; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9183; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9184; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9185; CHECK-NEXT:    call void @llvm.donothing()
9186; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9187; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9188; CHECK:       3:
9189; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9190; CHECK-NEXT:    unreachable
9191; CHECK:       4:
9192; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
9193; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9194; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9195; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9196; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
9197; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9198; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9199; CHECK:       8:
9200; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9201; CHECK-NEXT:    unreachable
9202; CHECK:       9:
9203; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
9204; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9205; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9206; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9207; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
9208; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
9209; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
9210; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
9211; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
9212; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9213; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9214; CHECK:       15:
9215; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9216; CHECK-NEXT:    unreachable
9217; CHECK:       16:
9218; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
9219; CHECK-NEXT:    store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
9220; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
9221;
9222  %tmp1 = load <16 x i8>, ptr %A
9223  %tmp2 = load <16 x i8>, ptr %B
9224  %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
9225  ret <16 x i8> %tmp3
9226}
9227
9228define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
9229; CHECK-LABEL: define <8 x i16> @sli8h(
9230; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9231; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9232; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9233; CHECK-NEXT:    call void @llvm.donothing()
9234; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9235; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9236; CHECK:       3:
9237; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9238; CHECK-NEXT:    unreachable
9239; CHECK:       4:
9240; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
9241; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9242; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9243; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9244; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
9245; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9246; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9247; CHECK:       8:
9248; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9249; CHECK-NEXT:    unreachable
9250; CHECK:       9:
9251; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
9252; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9253; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9254; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9255; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
9256; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
9257; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
9258; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
9259; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
9260; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9261; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9262; CHECK:       15:
9263; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9264; CHECK-NEXT:    unreachable
9265; CHECK:       16:
9266; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 1)
9267; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
9268; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
9269;
9270  %tmp1 = load <8 x i16>, ptr %A
9271  %tmp2 = load <8 x i16>, ptr %B
9272  %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
9273  ret <8 x i16> %tmp3
9274}
9275
9276define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
9277; CHECK-LABEL: define <4 x i32> @sli4s(
9278; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9279; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9280; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9281; CHECK-NEXT:    call void @llvm.donothing()
9282; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9283; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9284; CHECK:       3:
9285; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9286; CHECK-NEXT:    unreachable
9287; CHECK:       4:
9288; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
9289; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9290; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9291; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9292; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
9293; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9294; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9295; CHECK:       8:
9296; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9297; CHECK-NEXT:    unreachable
9298; CHECK:       9:
9299; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
9300; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9301; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9302; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9303; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
9304; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
9305; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
9306; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
9307; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
9308; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9309; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9310; CHECK:       15:
9311; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9312; CHECK-NEXT:    unreachable
9313; CHECK:       16:
9314; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1)
9315; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
9316; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
9317;
9318  %tmp1 = load <4 x i32>, ptr %A
9319  %tmp2 = load <4 x i32>, ptr %B
9320  %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
9321  ret <4 x i32> %tmp3
9322}
9323
9324define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
9325; CHECK-LABEL: define <2 x i64> @sli2d(
9326; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
9327; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
9328; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9329; CHECK-NEXT:    call void @llvm.donothing()
9330; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
9331; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
9332; CHECK:       3:
9333; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9334; CHECK-NEXT:    unreachable
9335; CHECK:       4:
9336; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
9337; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
9338; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9339; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9340; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
9341; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
9342; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9343; CHECK:       8:
9344; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9345; CHECK-NEXT:    unreachable
9346; CHECK:       9:
9347; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
9348; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
9349; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
9350; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
9351; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
9352; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
9353; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
9354; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
9355; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
9356; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
9357; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
9358; CHECK:       15:
9359; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9360; CHECK-NEXT:    unreachable
9361; CHECK:       16:
9362; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 1)
9363; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
9364; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
9365;
9366  %tmp1 = load <2 x i64>, ptr %A
9367  %tmp2 = load <2 x i64>, ptr %B
9368  %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
9369  ret <2 x i64> %tmp3
9370}
9371
9372declare <8 x i8>  @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
9373declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
9374declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
9375declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
9376
9377declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
9378declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
9379declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
9380declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
9381
9382define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) sanitize_memory {
9383; CHECK-LABEL: define <1 x i64> @ashr_v1i64(
9384; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR3]] {
9385; CHECK-NEXT:    [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
9386; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
9387; CHECK-NEXT:    call void @llvm.donothing()
9388; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
9389; CHECK-NEXT:    [[TMP4:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
9390; CHECK-NEXT:    [[TMP5:%.*]] = ashr <1 x i64> [[TMP1]], [[B]]
9391; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP5]], [[TMP4]]
9392; CHECK-NEXT:    [[C:%.*]] = ashr <1 x i64> [[A]], [[B]]
9393; CHECK-NEXT:    store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
9394; CHECK-NEXT:    ret <1 x i64> [[C]]
9395;
9396  %c = ashr <1 x i64> %a, %b
9397  ret <1 x i64> %c
9398}
9399
9400define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9401; CHECK-LABEL: define void @sqshl_zero_shift_amount(
9402; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9403; CHECK-NEXT:  entry:
9404; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9405; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9406; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9407; CHECK-NEXT:    call void @llvm.donothing()
9408; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9409; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9410; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9411; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9412; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9413; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9414; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9415; CHECK:       5:
9416; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9417; CHECK-NEXT:    unreachable
9418; CHECK:       6:
9419; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9420; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9421; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9422; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9423; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9424; CHECK-NEXT:    ret void
9425;
9426entry:
9427  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9428  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9429  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9430  ret void
9431}
9432
9433define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9434; CHECK-LABEL: define void @uqshl_zero_shift_amount(
9435; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9436; CHECK-NEXT:  entry:
9437; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9438; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9439; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9440; CHECK-NEXT:    call void @llvm.donothing()
9441; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9442; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9443; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9444; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9445; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9446; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9447; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9448; CHECK:       5:
9449; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9450; CHECK-NEXT:    unreachable
9451; CHECK:       6:
9452; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9453; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9454; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9455; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9456; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9457; CHECK-NEXT:    ret void
9458;
9459entry:
9460  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9461  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9462  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9463  ret void
9464}
9465
9466define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9467; CHECK-LABEL: define void @srshl_zero_shift_amount(
9468; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9469; CHECK-NEXT:  entry:
9470; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9471; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9472; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9473; CHECK-NEXT:    call void @llvm.donothing()
9474; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9475; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9476; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9477; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9478; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9479; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9480; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9481; CHECK:       5:
9482; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9483; CHECK-NEXT:    unreachable
9484; CHECK:       6:
9485; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9486; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9487; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9488; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9489; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9490; CHECK-NEXT:    ret void
9491;
9492entry:
9493  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9494  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9495  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9496  ret void
9497}
9498
9499define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9500; CHECK-LABEL: define void @urshl_zero_shift_amount(
9501; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9502; CHECK-NEXT:  entry:
9503; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9504; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9505; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9506; CHECK-NEXT:    call void @llvm.donothing()
9507; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9508; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9509; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9510; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9511; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9512; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9513; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9514; CHECK:       5:
9515; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9516; CHECK-NEXT:    unreachable
9517; CHECK:       6:
9518; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9519; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9520; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9521; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9522; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9523; CHECK-NEXT:    ret void
9524;
9525entry:
9526  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9527  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9528  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9529  ret void
9530}
9531
9532define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9533; CHECK-LABEL: define void @sqshlu_zero_shift_amount(
9534; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9535; CHECK-NEXT:  entry:
9536; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9537; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9538; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9539; CHECK-NEXT:    call void @llvm.donothing()
9540; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9541; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9542; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9543; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9544; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9545; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9546; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9547; CHECK:       5:
9548; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9549; CHECK-NEXT:    unreachable
9550; CHECK:       6:
9551; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9552; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9553; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9554; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9555; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9556; CHECK-NEXT:    ret void
9557;
9558entry:
9559  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9560  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9561  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9562  ret void
9563}
9564
9565define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9566; CHECK-LABEL: define void @sshl_zero_shift_amount(
9567; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9568; CHECK-NEXT:  entry:
9569; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9570; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9571; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9572; CHECK-NEXT:    call void @llvm.donothing()
9573; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9574; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9575; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9576; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9577; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9578; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9579; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9580; CHECK:       5:
9581; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9582; CHECK-NEXT:    unreachable
9583; CHECK:       6:
9584; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9585; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9586; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9587; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9588; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9589; CHECK-NEXT:    ret void
9590;
9591entry:
9592  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9593  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9594  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9595  ret void
9596}
9597
9598define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
9599; CHECK-LABEL: define void @ushl_zero_shift_amount(
9600; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
9601; CHECK-NEXT:  entry:
9602; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
9603; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
9604; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
9605; CHECK-NEXT:    call void @llvm.donothing()
9606; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
9607; CHECK-NEXT:    [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
9608; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> zeroinitializer)
9609; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
9610; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
9611; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
9612; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
9613; CHECK:       5:
9614; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
9615; CHECK-NEXT:    unreachable
9616; CHECK:       6:
9617; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
9618; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
9619; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
9620; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
9621; CHECK-NEXT:    store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
9622; CHECK-NEXT:    ret void
9623;
9624entry:
9625  %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
9626  %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
9627  store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
9628  ret void
9629}
9630
9631define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) sanitize_memory {
9632; CHECK-LABEL: define <4 x i32> @sext_rshrn(
9633; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
9634; CHECK-NEXT:  entry:
9635; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
9636; CHECK-NEXT:    call void @llvm.donothing()
9637; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP0]], i32 13)
9638; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
9639; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
9640; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i32>
9641; CHECK-NEXT:    [[VMOVL_I:%.*]] = sext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
9642; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
9643; CHECK-NEXT:    ret <4 x i32> [[VMOVL_I]]
9644;
9645entry:
9646  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
9647  %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
9648  ret <4 x i32> %vmovl.i
9649}
9650
9651define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) sanitize_memory {
9652; CHECK-LABEL: define <4 x i32> @zext_rshrn(
9653; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
9654; CHECK-NEXT:  entry:
9655; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
9656; CHECK-NEXT:    call void @llvm.donothing()
9657; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP0]], i32 13)
9658; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
9659; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
9660; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
9661; CHECK-NEXT:    [[VMOVL_I:%.*]] = zext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
9662; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
9663; CHECK-NEXT:    ret <4 x i32> [[VMOVL_I]]
9664;
9665entry:
9666  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
9667  %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
9668  ret <4 x i32> %vmovl.i
9669}
9670
9671define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory {
9672; CHECK-LABEL: define <4 x i16> @mul_rshrn(
9673; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
9674; CHECK-NEXT:  entry:
9675; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
9676; CHECK-NEXT:    call void @llvm.donothing()
9677; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
9678; CHECK-NEXT:    [[B:%.*]] = add <4 x i32> [[A]], splat (i32 3)
9679; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSPROP]], i32 13)
9680; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
9681; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13)
9682; CHECK-NEXT:    store <4 x i16> [[TMP2]], ptr @__msan_retval_tls, align 8
9683; CHECK-NEXT:    ret <4 x i16> [[VRSHRN_N1]]
9684;
9685entry:
9686  %b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
9687  %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
9688  ret <4 x i16> %vrshrn_n1
9689}
9690
9691declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
9692;.
9693; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
9694;.
9695