xref: /llvm-project/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll (revision 4a426079d60a664419dfc98855a798c4103fbbf0)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
3
4target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-linux-gnu"
6
7declare <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64>, <1 x i64>) nounwind readnone
8
9define i64 @test1(<1 x i64> %a, <1 x i64> %b) #0 {
10; CHECK-LABEL: define i64 @test1(
11; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
14; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
15; CHECK-NEXT:    call void @llvm.donothing()
16; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
17; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
18; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
19; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
20; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
21; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
22; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
23; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
24; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
25; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5:[0-9]+]]
26; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
27; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
28; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
29; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
30; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
31; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
32; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
33; CHECK-NEXT:    ret i64 [[TMP15]]
34;
35entry:
36  %0 = bitcast <1 x i64> %b to <4 x i16>
37  %1 = bitcast <1 x i64> %a to <4 x i16>
38  %2 = bitcast <4 x i16> %1 to <1 x i64>
39  %3 = bitcast <4 x i16> %0 to <1 x i64>
40  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
41  %5 = bitcast <1 x i64> %4 to <4 x i16>
42  %6 = bitcast <4 x i16> %5 to <1 x i64>
43  %7 = extractelement <1 x i64> %6, i32 0
44  ret i64 %7
45}
46
47declare <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64>, <1 x i64>) nounwind readnone
48
49define i64 @test88(<1 x i64> %a, <1 x i64> %b) #0 {
50; CHECK-LABEL: define i64 @test88(
51; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
52; CHECK-NEXT:  entry:
53; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
54; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
55; CHECK-NEXT:    call void @llvm.donothing()
56; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
57; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
58; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
59; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
60; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
61; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
62; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
63; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
64; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
65; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2:[0-9]+]]
66; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
67; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
68; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
69; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
70; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
71; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
72; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
73; CHECK-NEXT:    ret i64 [[TMP13]]
74;
75entry:
76  %0 = bitcast <1 x i64> %b to <2 x i32>
77  %1 = bitcast <1 x i64> %a to <2 x i32>
78  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
79  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
80  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
81  %3 = bitcast <1 x i64> %2 to <2 x i32>
82  %4 = bitcast <2 x i32> %3 to <1 x i64>
83  %5 = extractelement <1 x i64> %4, i32 0
84  ret i64 %5
85}
86
87declare <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64>, <1 x i64>) nounwind readnone
88
89define i64 @test87(<1 x i64> %a, <1 x i64> %b) #0 {
90; CHECK-LABEL: define i64 @test87(
91; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
92; CHECK-NEXT:  entry:
93; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
94; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
95; CHECK-NEXT:    call void @llvm.donothing()
96; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
97; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
98; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
99; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
100; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
101; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
102; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
103; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
104; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
105; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
106; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
107; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
108; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
109; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
110; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
111; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
112; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
113; CHECK-NEXT:    ret i64 [[TMP13]]
114;
115entry:
116  %0 = bitcast <1 x i64> %b to <4 x i16>
117  %1 = bitcast <1 x i64> %a to <4 x i16>
118  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
119  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
120  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
121  %3 = bitcast <1 x i64> %2 to <4 x i16>
122  %4 = bitcast <4 x i16> %3 to <1 x i64>
123  %5 = extractelement <1 x i64> %4, i32 0
124  ret i64 %5
125}
126
127declare <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64>, <1 x i64>) nounwind readnone
128
129define i64 @test86(<1 x i64> %a, <1 x i64> %b) #0 {
130; CHECK-LABEL: define i64 @test86(
131; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
132; CHECK-NEXT:  entry:
133; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
134; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
135; CHECK-NEXT:    call void @llvm.donothing()
136; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
137; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
138; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
139; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
140; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
141; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
142; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
143; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
144; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
145; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
146; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
147; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
148; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
149; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
150; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
151; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
152; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
153; CHECK-NEXT:    ret i64 [[TMP13]]
154;
155entry:
156  %0 = bitcast <1 x i64> %b to <8 x i8>
157  %1 = bitcast <1 x i64> %a to <8 x i8>
158  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
159  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
160  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpgt.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
161  %3 = bitcast <1 x i64> %2 to <8 x i8>
162  %4 = bitcast <8 x i8> %3 to <1 x i64>
163  %5 = extractelement <1 x i64> %4, i32 0
164  ret i64 %5
165}
166
167declare <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64>, <1 x i64>) nounwind readnone
168
169define i64 @test85(<1 x i64> %a, <1 x i64> %b) #0 {
170; CHECK-LABEL: define i64 @test85(
171; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
172; CHECK-NEXT:  entry:
173; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
174; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
175; CHECK-NEXT:    call void @llvm.donothing()
176; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
177; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
178; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
179; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
180; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
181; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
182; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
183; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
184; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
185; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
186; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
187; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
188; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
189; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
190; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
191; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
192; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
193; CHECK-NEXT:    ret i64 [[TMP13]]
194;
195entry:
196  %0 = bitcast <1 x i64> %b to <2 x i32>
197  %1 = bitcast <1 x i64> %a to <2 x i32>
198  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
199  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
200  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
201  %3 = bitcast <1 x i64> %2 to <2 x i32>
202  %4 = bitcast <2 x i32> %3 to <1 x i64>
203  %5 = extractelement <1 x i64> %4, i32 0
204  ret i64 %5
205}
206
207declare <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64>, <1 x i64>) nounwind readnone
208
209define i64 @test84(<1 x i64> %a, <1 x i64> %b) #0 {
210; CHECK-LABEL: define i64 @test84(
211; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
212; CHECK-NEXT:  entry:
213; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
214; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
215; CHECK-NEXT:    call void @llvm.donothing()
216; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
217; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
218; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
219; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
220; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
221; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
222; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
223; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
224; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
225; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
226; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
227; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
228; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
229; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
230; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
231; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
232; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
233; CHECK-NEXT:    ret i64 [[TMP13]]
234;
235entry:
236  %0 = bitcast <1 x i64> %b to <4 x i16>
237  %1 = bitcast <1 x i64> %a to <4 x i16>
238  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
239  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
240  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
241  %3 = bitcast <1 x i64> %2 to <4 x i16>
242  %4 = bitcast <4 x i16> %3 to <1 x i64>
243  %5 = extractelement <1 x i64> %4, i32 0
244  ret i64 %5
245}
246
247declare <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64>, <1 x i64>) nounwind readnone
248
249define i64 @test83(<1 x i64> %a, <1 x i64> %b) #0 {
250; CHECK-LABEL: define i64 @test83(
251; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
252; CHECK-NEXT:  entry:
253; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
254; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
255; CHECK-NEXT:    call void @llvm.donothing()
256; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
257; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
258; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
259; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
260; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
261; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
262; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
263; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
264; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
265; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
266; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
267; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
268; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
269; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
270; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
271; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
272; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
273; CHECK-NEXT:    ret i64 [[TMP13]]
274;
275entry:
276  %0 = bitcast <1 x i64> %b to <8 x i8>
277  %1 = bitcast <1 x i64> %a to <8 x i8>
278  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
279  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
280  %2 = tail call <1 x i64> @llvm.x86.mmx.pcmpeq.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
281  %3 = bitcast <1 x i64> %2 to <8 x i8>
282  %4 = bitcast <8 x i8> %3 to <1 x i64>
283  %5 = extractelement <1 x i64> %4, i32 0
284  ret i64 %5
285}
286
287declare <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64>, <1 x i64>) nounwind readnone
288
289define i64 @test82(<1 x i64> %a, <1 x i64> %b) #0 {
290; CHECK-LABEL: define i64 @test82(
291; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
292; CHECK-NEXT:  entry:
293; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
294; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
295; CHECK-NEXT:    call void @llvm.donothing()
296; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
297; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
298; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
299; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
300; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
301; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
302; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
303; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
304; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
305; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
306; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
307; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
308; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
309; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
310; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
311; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
312; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
313; CHECK-NEXT:    ret i64 [[TMP13]]
314;
315entry:
316  %0 = bitcast <1 x i64> %b to <2 x i32>
317  %1 = bitcast <1 x i64> %a to <2 x i32>
318  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
319  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
320  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckldq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
321  %3 = bitcast <1 x i64> %2 to <2 x i32>
322  %4 = bitcast <2 x i32> %3 to <1 x i64>
323  %5 = extractelement <1 x i64> %4, i32 0
324  ret i64 %5
325}
326
327declare <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64>, <1 x i64>) nounwind readnone
328
329define i64 @test81(<1 x i64> %a, <1 x i64> %b) #0 {
330; CHECK-LABEL: define i64 @test81(
331; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
332; CHECK-NEXT:  entry:
333; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
334; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
335; CHECK-NEXT:    call void @llvm.donothing()
336; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
337; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
338; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
339; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
340; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
341; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
342; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
343; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
344; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
345; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
346; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
347; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
348; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
349; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
350; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
351; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
352; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
353; CHECK-NEXT:    ret i64 [[TMP13]]
354;
355entry:
356  %0 = bitcast <1 x i64> %b to <4 x i16>
357  %1 = bitcast <1 x i64> %a to <4 x i16>
358  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
359  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
360  %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
361  %3 = bitcast <1 x i64> %2 to <4 x i16>
362  %4 = bitcast <4 x i16> %3 to <1 x i64>
363  %5 = extractelement <1 x i64> %4, i32 0
364  ret i64 %5
365}
366
367declare <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64>, <1 x i64>) nounwind readnone
368
369define i64 @test80(<1 x i64> %a, <1 x i64> %b) #0 {
370; CHECK-LABEL: define i64 @test80(
371; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
372; CHECK-NEXT:  entry:
373; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
374; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
375; CHECK-NEXT:    call void @llvm.donothing()
376; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
377; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
378; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
379; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
380; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
381; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
382; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
383; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
384; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
385; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
386; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
387; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
388; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
389; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
390; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
391; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
392; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
393; CHECK-NEXT:    ret i64 [[TMP13]]
394;
395entry:
396  %0 = bitcast <1 x i64> %b to <8 x i8>
397  %1 = bitcast <1 x i64> %a to <8 x i8>
398  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
399  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
400  %2 = tail call <1 x i64> @llvm.x86.mmx.punpcklbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
401  %3 = bitcast <1 x i64> %2 to <8 x i8>
402  %4 = bitcast <8 x i8> %3 to <1 x i64>
403  %5 = extractelement <1 x i64> %4, i32 0
404  ret i64 %5
405}
406
407declare <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64>, <1 x i64>) nounwind readnone
408
409define i64 @test79(<1 x i64> %a, <1 x i64> %b) #0 {
410; CHECK-LABEL: define i64 @test79(
411; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
412; CHECK-NEXT:  entry:
413; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
414; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
415; CHECK-NEXT:    call void @llvm.donothing()
416; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
417; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
418; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
419; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
420; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
421; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
422; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
423; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
424; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
425; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
426; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
427; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
428; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
429; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
430; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
431; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
432; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
433; CHECK-NEXT:    ret i64 [[TMP13]]
434;
435entry:
436  %0 = bitcast <1 x i64> %b to <2 x i32>
437  %1 = bitcast <1 x i64> %a to <2 x i32>
438  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
439  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
440  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhdq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
441  %3 = bitcast <1 x i64> %2 to <2 x i32>
442  %4 = bitcast <2 x i32> %3 to <1 x i64>
443  %5 = extractelement <1 x i64> %4, i32 0
444  ret i64 %5
445}
446
447declare <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64>, <1 x i64>) nounwind readnone
448
449define i64 @test78(<1 x i64> %a, <1 x i64> %b) #0 {
450; CHECK-LABEL: define i64 @test78(
451; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
452; CHECK-NEXT:  entry:
453; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
454; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
455; CHECK-NEXT:    call void @llvm.donothing()
456; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
457; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
458; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
459; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
460; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
461; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
462; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
463; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
464; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
465; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
466; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
467; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
468; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
469; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
470; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
471; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
472; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
473; CHECK-NEXT:    ret i64 [[TMP13]]
474;
475entry:
476  %0 = bitcast <1 x i64> %b to <4 x i16>
477  %1 = bitcast <1 x i64> %a to <4 x i16>
478  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
479  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
480  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhwd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
481  %3 = bitcast <1 x i64> %2 to <4 x i16>
482  %4 = bitcast <4 x i16> %3 to <1 x i64>
483  %5 = extractelement <1 x i64> %4, i32 0
484  ret i64 %5
485}
486
487declare <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64>, <1 x i64>) nounwind readnone
488
489define i64 @test77(<1 x i64> %a, <1 x i64> %b) #0 {
490; CHECK-LABEL: define i64 @test77(
491; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
492; CHECK-NEXT:  entry:
493; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
494; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
495; CHECK-NEXT:    call void @llvm.donothing()
496; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
497; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
498; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
499; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
500; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
501; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
502; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
503; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
504; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
505; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
506; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
507; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
508; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
509; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
510; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
511; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
512; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
513; CHECK-NEXT:    ret i64 [[TMP13]]
514;
515entry:
516  %0 = bitcast <1 x i64> %b to <8 x i8>
517  %1 = bitcast <1 x i64> %a to <8 x i8>
518  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
519  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
520  %2 = tail call <1 x i64> @llvm.x86.mmx.punpckhbw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
521  %3 = bitcast <1 x i64> %2 to <8 x i8>
522  %4 = bitcast <8 x i8> %3 to <1 x i64>
523  %5 = extractelement <1 x i64> %4, i32 0
524  ret i64 %5
525}
526
527declare <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64>, <1 x i64>) nounwind readnone
528
529define i64 @test76(<1 x i64> %a, <1 x i64> %b) #0 {
530; CHECK-LABEL: define i64 @test76(
531; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
532; CHECK-NEXT:  entry:
533; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
534; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
535; CHECK-NEXT:    call void @llvm.donothing()
536; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
537; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
538; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
539; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
540; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64>
541; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
542; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
543; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
544; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16>
545; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
546; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
547; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
548; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
549; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
550; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
551; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
552; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
553; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
554; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8>
555; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
556; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
557; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
558; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
559; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
560; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
561; CHECK-NEXT:    ret i64 [[TMP22]]
562;
563entry:
564  %0 = bitcast <1 x i64> %b to <4 x i16>
565  %1 = bitcast <1 x i64> %a to <4 x i16>
566  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
567  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
568  %2 = tail call <1 x i64> @llvm.x86.mmx.packuswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
569  %3 = bitcast <1 x i64> %2 to <8 x i8>
570  %4 = bitcast <8 x i8> %3 to <1 x i64>
571  %5 = extractelement <1 x i64> %4, i32 0
572  ret i64 %5
573}
574
575declare <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64>, <1 x i64>) nounwind readnone
576
577define i64 @test75(<1 x i64> %a, <1 x i64> %b) #0 {
578; CHECK-LABEL: define i64 @test75(
579; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
580; CHECK-NEXT:  entry:
581; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
582; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
583; CHECK-NEXT:    call void @llvm.donothing()
584; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <2 x i32>
585; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
586; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <2 x i32>
587; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
588; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
589; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
590; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
591; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
592; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <2 x i32>
593; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
594; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP8]], zeroinitializer
595; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
596; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
597; CHECK-NEXT:    [[TMP13:%.*]] = sext <2 x i1> [[TMP12]] to <2 x i32>
598; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
599; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
600; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
601; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
602; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <4 x i16>
603; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <4 x i16>
604; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
605; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <4 x i16> [[TMP4]] to <1 x i64>
606; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
607; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
608; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
609; CHECK-NEXT:    ret i64 [[TMP22]]
610;
611entry:
612  %0 = bitcast <1 x i64> %b to <2 x i32>
613  %1 = bitcast <1 x i64> %a to <2 x i32>
614  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
615  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
616  %2 = tail call <1 x i64> @llvm.x86.mmx.packssdw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
617  %3 = bitcast <1 x i64> %2 to <4 x i16>
618  %4 = bitcast <4 x i16> %3 to <1 x i64>
619  %5 = extractelement <1 x i64> %4, i32 0
620  ret i64 %5
621}
622
623declare <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64>, <1 x i64>) nounwind readnone
624
625define i64 @test74(<1 x i64> %a, <1 x i64> %b) #0 {
626; CHECK-LABEL: define i64 @test74(
627; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
628; CHECK-NEXT:  entry:
629; CHECK-NEXT:    [[TMP16:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
630; CHECK-NEXT:    [[TMP17:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
631; CHECK-NEXT:    call void @llvm.donothing()
632; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP16]] to <4 x i16>
633; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
634; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP17]] to <4 x i16>
635; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
636; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <4 x i16> [[TMP20]] to <1 x i64>
637; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
638; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
639; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
640; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP23]] to <4 x i16>
641; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
642; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
643; CHECK-NEXT:    [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i16>
644; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
645; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
646; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
647; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
648; CHECK-NEXT:    [[_MSPROP_VECTOR_PACK:%.*]] = call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[TMP14]], <1 x i64> [[TMP15]])
649; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
650; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[_MSPROP_VECTOR_PACK]] to <8 x i8>
651; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
652; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
653; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
654; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
655; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
656; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
657; CHECK-NEXT:    ret i64 [[TMP22]]
658;
659entry:
660  %0 = bitcast <1 x i64> %b to <4 x i16>
661  %1 = bitcast <1 x i64> %a to <4 x i16>
662  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
663  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
664  %2 = tail call <1 x i64> @llvm.x86.mmx.packsswb(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
665  %3 = bitcast <1 x i64> %2 to <8 x i8>
666  %4 = bitcast <8 x i8> %3 to <1 x i64>
667  %5 = extractelement <1 x i64> %4, i32 0
668  ret i64 %5
669}
670
671declare <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64>, i32) nounwind readnone
672
673define i64 @test73(<1 x i64> %a) #0 {
674; CHECK-LABEL: define i64 @test73(
675; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
676; CHECK-NEXT:  entry:
677; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
678; CHECK-NEXT:    call void @llvm.donothing()
679; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
680; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
681; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
682; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
683; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[TMP1]], i32 3)
684; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
685; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
686; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
687; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
688; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
689; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
690; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
691; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
692; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
693; CHECK-NEXT:    ret i64 [[TMP13]]
694;
695entry:
696  %0 = bitcast <1 x i64> %a to <2 x i32>
697  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
698  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.d(<1 x i64> %mmx_var.i, i32 3) nounwind
699  %2 = bitcast <1 x i64> %1 to <2 x i32>
700  %3 = bitcast <2 x i32> %2 to <1 x i64>
701  %4 = extractelement <1 x i64> %3, i32 0
702  ret i64 %4
703}
704
705declare <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64>, i32) nounwind readnone
706
707define i64 @test72(<1 x i64> %a) #0 {
708; CHECK-LABEL: define i64 @test72(
709; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
710; CHECK-NEXT:  entry:
711; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
712; CHECK-NEXT:    call void @llvm.donothing()
713; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
714; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
715; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
716; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
717; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 3)
718; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
719; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
720; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
721; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
722; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
723; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
724; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
725; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
726; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
727; CHECK-NEXT:    ret i64 [[TMP13]]
728;
729entry:
730  %0 = bitcast <1 x i64> %a to <4 x i16>
731  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
732  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 3) nounwind
733  %2 = bitcast <1 x i64> %1 to <4 x i16>
734  %3 = bitcast <4 x i16> %2 to <1 x i64>
735  %4 = extractelement <1 x i64> %3, i32 0
736  ret i64 %4
737}
738
739define i64 @test72_2(<1 x i64> %a) #0 {
740; CHECK-LABEL: define i64 @test72_2(
741; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
742; CHECK-NEXT:  entry:
743; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
744; CHECK-NEXT:    call void @llvm.donothing()
745; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
746; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
747; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
748; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
749; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[TMP1]], i32 0)
750; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
751; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
752; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
753; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
754; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
755; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
756; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
757; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
758; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
759; CHECK-NEXT:    ret i64 [[TMP13]]
760;
761entry:
762  %0 = bitcast <1 x i64> %a to <4 x i16>
763  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
764  %1 = tail call <1 x i64> @llvm.x86.mmx.psrai.w(<1 x i64> %mmx_var.i, i32 0) nounwind
765  %2 = bitcast <1 x i64> %1 to <4 x i16>
766  %3 = bitcast <4 x i16> %2 to <1 x i64>
767  %4 = extractelement <1 x i64> %3, i32 0
768  ret i64 %4
769}
770
771declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone
772
773define i64 @test71(<1 x i64> %a) #0 {
774; CHECK-LABEL: define i64 @test71(
775; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
776; CHECK-NEXT:  entry:
777; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
778; CHECK-NEXT:    call void @llvm.donothing()
779; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
780; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
781; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
782; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
783; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[TMP2]], i32 3)
784; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer
785; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
786; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
787; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
788; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
789; CHECK-NEXT:    ret i64 [[TMP4]]
790;
791entry:
792  %0 = extractelement <1 x i64> %a, i32 0
793  %mmx_var.i = bitcast i64 %0 to <1 x i64>
794  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
795  %2 = bitcast <1 x i64> %1 to i64
796  ret i64 %2
797}
798
799declare <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64>, i32) nounwind readnone
800
801define i64 @test70(<1 x i64> %a) #0 {
802; CHECK-LABEL: define i64 @test70(
803; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
804; CHECK-NEXT:  entry:
805; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
806; CHECK-NEXT:    call void @llvm.donothing()
807; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
808; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
809; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
810; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
811; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 3)
812; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
813; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
814; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
815; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
816; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
817; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
818; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
819; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
820; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
821; CHECK-NEXT:    ret i64 [[TMP13]]
822;
823entry:
824  %0 = bitcast <1 x i64> %a to <2 x i32>
825  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
826  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
827  %2 = bitcast <1 x i64> %1 to <2 x i32>
828  %3 = bitcast <2 x i32> %2 to <1 x i64>
829  %4 = extractelement <1 x i64> %3, i32 0
830  ret i64 %4
831}
832
833define i64 @test70_2(<1 x i64> %a) #0 {
834; CHECK-LABEL: define i64 @test70_2(
835; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
836; CHECK-NEXT:  entry:
837; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
838; CHECK-NEXT:    call void @llvm.donothing()
839; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
840; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
841; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
842; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
843; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[TMP1]], i32 0)
844; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
845; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
846; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
847; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
848; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
849; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
850; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
851; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
852; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
853; CHECK-NEXT:    ret i64 [[TMP13]]
854;
855entry:
856  %0 = bitcast <1 x i64> %a to <2 x i32>
857  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
858  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.d(<1 x i64> %mmx_var.i, i32 0) nounwind
859  %2 = bitcast <1 x i64> %1 to <2 x i32>
860  %3 = bitcast <2 x i32> %2 to <1 x i64>
861  %4 = extractelement <1 x i64> %3, i32 0
862  ret i64 %4
863}
864
865declare <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64>, i32) nounwind readnone
866
867define i64 @test69(<1 x i64> %a) #0 {
868; CHECK-LABEL: define i64 @test69(
869; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
870; CHECK-NEXT:  entry:
871; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
872; CHECK-NEXT:    call void @llvm.donothing()
873; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
874; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
875; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
876; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
877; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[TMP1]], i32 3)
878; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
879; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
880; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
881; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
882; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
883; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
884; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
885; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
886; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
887; CHECK-NEXT:    ret i64 [[TMP13]]
888;
889entry:
890  %0 = bitcast <1 x i64> %a to <4 x i16>
891  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
892  %1 = tail call <1 x i64> @llvm.x86.mmx.psrli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
893  %2 = bitcast <1 x i64> %1 to <4 x i16>
894  %3 = bitcast <4 x i16> %2 to <1 x i64>
895  %4 = extractelement <1 x i64> %3, i32 0
896  ret i64 %4
897}
898
899declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone
900
901define i64 @test68(<1 x i64> %a) #0 {
902; CHECK-LABEL: define i64 @test68(
903; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
904; CHECK-NEXT:  entry:
905; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
906; CHECK-NEXT:    call void @llvm.donothing()
907; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
908; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
909; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
910; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
911; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[TMP2]], i32 3)
912; CHECK-NEXT:    [[TMP6:%.*]] = or <1 x i64> [[TMP3]], zeroinitializer
913; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
914; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
915; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
916; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
917; CHECK-NEXT:    ret i64 [[TMP4]]
918;
919entry:
920  %0 = extractelement <1 x i64> %a, i32 0
921  %mmx_var.i = bitcast i64 %0 to <1 x i64>
922  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %mmx_var.i, i32 3) nounwind
923  %2 = bitcast <1 x i64> %1 to i64
924  ret i64 %2
925}
926
927declare <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64>, i32) nounwind readnone
928
929define i64 @test67(<1 x i64> %a) #0 {
930; CHECK-LABEL: define i64 @test67(
931; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
932; CHECK-NEXT:  entry:
933; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
934; CHECK-NEXT:    call void @llvm.donothing()
935; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
936; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
937; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
938; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
939; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[TMP1]], i32 3)
940; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
941; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
942; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
943; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
944; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP10]] to <1 x i64>
945; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
946; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
947; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
948; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
949; CHECK-NEXT:    ret i64 [[TMP13]]
950;
951entry:
952  %0 = bitcast <1 x i64> %a to <2 x i32>
953  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
954  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.d(<1 x i64> %mmx_var.i, i32 3) nounwind
955  %2 = bitcast <1 x i64> %1 to <2 x i32>
956  %3 = bitcast <2 x i32> %2 to <1 x i64>
957  %4 = extractelement <1 x i64> %3, i32 0
958  ret i64 %4
959}
960
961declare <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64>, i32) nounwind readnone
962
963define i64 @test66(<1 x i64> %a) #0 {
964; CHECK-LABEL: define i64 @test66(
965; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
966; CHECK-NEXT:  entry:
967; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
968; CHECK-NEXT:    call void @llvm.donothing()
969; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
970; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
971; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
972; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
973; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 3)
974; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
975; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 3) #[[ATTR2]]
976; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
977; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
978; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
979; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
980; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
981; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
982; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
983; CHECK-NEXT:    ret i64 [[TMP13]]
984;
985entry:
986  %0 = bitcast <1 x i64> %a to <4 x i16>
987  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
988  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 3) nounwind
989  %2 = bitcast <1 x i64> %1 to <4 x i16>
990  %3 = bitcast <4 x i16> %2 to <1 x i64>
991  %4 = extractelement <1 x i64> %3, i32 0
992  ret i64 %4
993}
994
995define i64 @test66_2(<1 x i64> %a) #0 {
996; CHECK-LABEL: define i64 @test66_2(
997; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
998; CHECK-NEXT:  entry:
999; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1000; CHECK-NEXT:    call void @llvm.donothing()
1001; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
1002; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1003; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
1004; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1005; CHECK-NEXT:    [[TMP2:%.*]] = call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[TMP1]], i32 0)
1006; CHECK-NEXT:    [[TMP11:%.*]] = or <1 x i64> [[TMP2]], zeroinitializer
1007; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> [[MMX_VAR_I]], i32 0) #[[ATTR2]]
1008; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
1009; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
1010; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i16> [[TMP10]] to <1 x i64>
1011; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1012; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
1013; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1014; CHECK-NEXT:    store i64 [[TMP6]], ptr @__msan_retval_tls, align 8
1015; CHECK-NEXT:    ret i64 [[TMP13]]
1016;
1017entry:
1018  %0 = bitcast <1 x i64> %a to <4 x i16>
1019  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1020  %1 = tail call <1 x i64> @llvm.x86.mmx.pslli.w(<1 x i64> %mmx_var.i, i32 0) nounwind
1021  %2 = bitcast <1 x i64> %1 to <4 x i16>
1022  %3 = bitcast <4 x i16> %2 to <1 x i64>
1023  %4 = extractelement <1 x i64> %3, i32 0
1024  ret i64 %4
1025}
1026
1027declare <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64>, <1 x i64>) nounwind readnone
1028
1029define i64 @test65(<1 x i64> %a, <1 x i64> %b) #0 {
1030; CHECK-LABEL: define i64 @test65(
1031; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1032; CHECK-NEXT:  entry:
1033; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1034; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1035; CHECK-NEXT:    call void @llvm.donothing()
1036; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
1037; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1038; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1039; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1040; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1041; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1042; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1043; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1044; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1045; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1046; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1047; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1048; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1049; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1050; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1051; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
1052; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
1053; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
1054; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
1055; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1056; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1057; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1058; CHECK-NEXT:    ret i64 [[TMP17]]
1059;
1060entry:
1061  %0 = bitcast <1 x i64> %a to <2 x i32>
1062  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1063  %1 = extractelement <1 x i64> %b, i32 0
1064  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1065  %2 = tail call <1 x i64> @llvm.x86.mmx.psra.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1066  %3 = bitcast <1 x i64> %2 to <2 x i32>
1067  %4 = bitcast <2 x i32> %3 to <1 x i64>
1068  %5 = extractelement <1 x i64> %4, i32 0
1069  ret i64 %5
1070}
1071
1072declare <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64>, <1 x i64>) nounwind readnone
1073
1074define i64 @test64(<1 x i64> %a, <1 x i64> %b) #0 {
1075; CHECK-LABEL: define i64 @test64(
1076; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1077; CHECK-NEXT:  entry:
1078; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1079; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1080; CHECK-NEXT:    call void @llvm.donothing()
1081; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
1082; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1083; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1084; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1085; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1086; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1087; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1088; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1089; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1090; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1091; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1092; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1093; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1094; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1095; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1096; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
1097; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
1098; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
1099; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
1100; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1101; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1102; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1103; CHECK-NEXT:    ret i64 [[TMP17]]
1104;
1105entry:
1106  %0 = bitcast <1 x i64> %a to <4 x i16>
1107  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1108  %1 = extractelement <1 x i64> %b, i32 0
1109  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1110  %2 = tail call <1 x i64> @llvm.x86.mmx.psra.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1111  %3 = bitcast <1 x i64> %2 to <4 x i16>
1112  %4 = bitcast <4 x i16> %3 to <1 x i64>
1113  %5 = extractelement <1 x i64> %4, i32 0
1114  ret i64 %5
1115}
1116
1117declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone
1118
1119define i64 @test63(<1 x i64> %a, <1 x i64> %b) #0 {
1120; CHECK-LABEL: define i64 @test63(
1121; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1122; CHECK-NEXT:  entry:
1123; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1124; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1125; CHECK-NEXT:    call void @llvm.donothing()
1126; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
1127; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
1128; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1129; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
1130; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
1131; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1132; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
1133; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1134; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64
1135; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0
1136; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
1137; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64>
1138; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]])
1139; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]]
1140; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1141; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
1142; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64
1143; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
1144; CHECK-NEXT:    ret i64 [[TMP5]]
1145;
1146entry:
1147  %0 = extractelement <1 x i64> %a, i32 0
1148  %mmx_var.i = bitcast i64 %0 to <1 x i64>
1149  %1 = extractelement <1 x i64> %b, i32 0
1150  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1151  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1152  %3 = bitcast <1 x i64> %2 to i64
1153  ret i64 %3
1154}
1155
1156declare <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64>, <1 x i64>) nounwind readnone
1157
1158define i64 @test62(<1 x i64> %a, <1 x i64> %b) #0 {
1159; CHECK-LABEL: define i64 @test62(
1160; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1161; CHECK-NEXT:  entry:
1162; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1163; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1164; CHECK-NEXT:    call void @llvm.donothing()
1165; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
1166; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1167; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1168; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1169; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1170; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1171; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1172; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1173; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1174; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1175; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1176; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1177; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1178; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1179; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1180; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
1181; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
1182; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
1183; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
1184; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1185; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1186; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1187; CHECK-NEXT:    ret i64 [[TMP17]]
1188;
1189entry:
1190  %0 = bitcast <1 x i64> %a to <2 x i32>
1191  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1192  %1 = extractelement <1 x i64> %b, i32 0
1193  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1194  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1195  %3 = bitcast <1 x i64> %2 to <2 x i32>
1196  %4 = bitcast <2 x i32> %3 to <1 x i64>
1197  %5 = extractelement <1 x i64> %4, i32 0
1198  ret i64 %5
1199}
1200
1201declare <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64>, <1 x i64>) nounwind readnone
1202
1203define i64 @test61(<1 x i64> %a, <1 x i64> %b) #0 {
1204; CHECK-LABEL: define i64 @test61(
1205; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1206; CHECK-NEXT:  entry:
1207; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1208; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1209; CHECK-NEXT:    call void @llvm.donothing()
1210; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
1211; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1212; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1213; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1214; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1215; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1216; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1217; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1218; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1219; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1220; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1221; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1222; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1223; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1224; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1225; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
1226; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
1227; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
1228; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
1229; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1230; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1231; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1232; CHECK-NEXT:    ret i64 [[TMP17]]
1233;
1234entry:
1235  %0 = bitcast <1 x i64> %a to <4 x i16>
1236  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1237  %1 = extractelement <1 x i64> %b, i32 0
1238  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1239  %2 = tail call <1 x i64> @llvm.x86.mmx.psrl.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1240  %3 = bitcast <1 x i64> %2 to <4 x i16>
1241  %4 = bitcast <4 x i16> %3 to <1 x i64>
1242  %5 = extractelement <1 x i64> %4, i32 0
1243  ret i64 %5
1244}
1245
1246declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone
1247
1248define i64 @test60(<1 x i64> %a, <1 x i64> %b) #0 {
1249; CHECK-LABEL: define i64 @test60(
1250; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1251; CHECK-NEXT:  entry:
1252; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1253; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1254; CHECK-NEXT:    call void @llvm.donothing()
1255; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
1256; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
1257; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1258; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
1259; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP8]], i32 0
1260; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1261; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
1262; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1263; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP13]] to i64
1264; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP6]], 0
1265; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
1266; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP10]] to <1 x i64>
1267; CHECK-NEXT:    [[TMP15:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[TMP3]], <1 x i64> [[MMX_VAR1_I]])
1268; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP15]], [[TMP14]]
1269; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1270; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
1271; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP12]] to i64
1272; CHECK-NEXT:    store i64 [[TMP11]], ptr @__msan_retval_tls, align 8
1273; CHECK-NEXT:    ret i64 [[TMP5]]
1274;
1275entry:
1276  %0 = extractelement <1 x i64> %a, i32 0
1277  %mmx_var.i = bitcast i64 %0 to <1 x i64>
1278  %1 = extractelement <1 x i64> %b, i32 0
1279  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1280  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1281  %3 = bitcast <1 x i64> %2 to i64
1282  ret i64 %3
1283}
1284
1285declare <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64>, <1 x i64>) nounwind readnone
1286
1287define i64 @test59(<1 x i64> %a, <1 x i64> %b) #0 {
1288; CHECK-LABEL: define i64 @test59(
1289; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1290; CHECK-NEXT:  entry:
1291; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1292; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1293; CHECK-NEXT:    call void @llvm.donothing()
1294; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
1295; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1296; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1297; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1298; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1299; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1300; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1301; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1302; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1303; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1304; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1305; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1306; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1307; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1308; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1309; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
1310; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <2 x i32>
1311; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP18]] to <1 x i64>
1312; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP5]] to <1 x i64>
1313; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1314; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1315; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1316; CHECK-NEXT:    ret i64 [[TMP17]]
1317;
1318entry:
1319  %0 = bitcast <1 x i64> %a to <2 x i32>
1320  %mmx_var.i = bitcast <2 x i32> %0 to <1 x i64>
1321  %1 = extractelement <1 x i64> %b, i32 0
1322  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1323  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1324  %3 = bitcast <1 x i64> %2 to <2 x i32>
1325  %4 = bitcast <2 x i32> %3 to <1 x i64>
1326  %5 = extractelement <1 x i64> %4, i32 0
1327  ret i64 %5
1328}
1329
1330declare <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64>, <1 x i64>) nounwind readnone
1331
1332define i64 @test58(<1 x i64> %a, <1 x i64> %b) #0 {
1333; CHECK-LABEL: define i64 @test58(
1334; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1335; CHECK-NEXT:  entry:
1336; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1337; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1338; CHECK-NEXT:    call void @llvm.donothing()
1339; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
1340; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1341; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1342; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1343; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP10]], i32 0
1344; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1345; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1346; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1347; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP8]] to i64
1348; CHECK-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP15]], 0
1349; CHECK-NEXT:    [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
1350; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i64 [[TMP14]] to <1 x i64>
1351; CHECK-NEXT:    [[TMP20:%.*]] = call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[TMP4]], <1 x i64> [[MMX_VAR1_I]])
1352; CHECK-NEXT:    [[TMP12:%.*]] = or <1 x i64> [[TMP20]], [[TMP19]]
1353; CHECK-NEXT:    [[TMP21:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1354; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
1355; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP21]] to <4 x i16>
1356; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP18]] to <1 x i64>
1357; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64>
1358; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
1359; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <1 x i64> [[TMP16]], i32 0
1360; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
1361; CHECK-NEXT:    ret i64 [[TMP17]]
1362;
1363entry:
1364  %0 = bitcast <1 x i64> %a to <4 x i16>
1365  %mmx_var.i = bitcast <4 x i16> %0 to <1 x i64>
1366  %1 = extractelement <1 x i64> %b, i32 0
1367  %mmx_var1.i = bitcast i64 %1 to <1 x i64>
1368  %2 = tail call <1 x i64> @llvm.x86.mmx.psll.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1369  %3 = bitcast <1 x i64> %2 to <4 x i16>
1370  %4 = bitcast <4 x i16> %3 to <1 x i64>
1371  %5 = extractelement <1 x i64> %4, i32 0
1372  ret i64 %5
1373}
1374
1375declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>) nounwind readnone
1376
1377define i64 @test56(<1 x i64> %a, <1 x i64> %b) #0 {
1378; CHECK-LABEL: define i64 @test56(
1379; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1380; CHECK-NEXT:  entry:
1381; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1382; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1383; CHECK-NEXT:    call void @llvm.donothing()
1384; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1385; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
1386; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
1387; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1388; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
1389; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
1390; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1391; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1392; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1393; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1394; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
1395; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
1396; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
1397; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
1398; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1399; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1400; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1401; CHECK-NEXT:    ret i64 [[TMP13]]
1402;
1403entry:
1404  %0 = bitcast <1 x i64> %b to <2 x i32>
1405  %1 = bitcast <1 x i64> %a to <2 x i32>
1406  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1407  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1408  %2 = tail call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1409  %3 = bitcast <1 x i64> %2 to <2 x i32>
1410  %4 = bitcast <2 x i32> %3 to <1 x i64>
1411  %5 = extractelement <1 x i64> %4, i32 0
1412  ret i64 %5
1413}
1414
1415declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>) nounwind readnone
1416
1417define i64 @test55(<1 x i64> %a, <1 x i64> %b) #0 {
1418; CHECK-LABEL: define i64 @test55(
1419; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1420; CHECK-NEXT:  entry:
1421; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1422; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1423; CHECK-NEXT:    call void @llvm.donothing()
1424; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1425; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
1426; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
1427; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1428; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
1429; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
1430; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1431; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1432; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1433; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1434; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
1435; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
1436; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
1437; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
1438; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1439; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1440; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1441; CHECK-NEXT:    ret i64 [[TMP13]]
1442;
1443entry:
1444  %0 = bitcast <1 x i64> %b to <2 x i32>
1445  %1 = bitcast <1 x i64> %a to <2 x i32>
1446  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1447  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1448  %2 = tail call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1449  %3 = bitcast <1 x i64> %2 to <2 x i32>
1450  %4 = bitcast <2 x i32> %3 to <1 x i64>
1451  %5 = extractelement <1 x i64> %4, i32 0
1452  ret i64 %5
1453}
1454
1455declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
1456
1457define i64 @test54(<1 x i64> %a, <1 x i64> %b) #0 {
1458; CHECK-LABEL: define i64 @test54(
1459; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1460; CHECK-NEXT:  entry:
1461; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1462; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1463; CHECK-NEXT:    call void @llvm.donothing()
1464; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1465; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
1466; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
1467; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1468; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
1469; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
1470; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1471; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1472; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1473; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1474; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
1475; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
1476; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
1477; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
1478; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1479; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1480; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1481; CHECK-NEXT:    ret i64 [[TMP13]]
1482;
1483entry:
1484  %0 = bitcast <1 x i64> %b to <2 x i32>
1485  %1 = bitcast <1 x i64> %a to <2 x i32>
1486  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1487  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1488  %2 = tail call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1489  %3 = bitcast <1 x i64> %2 to <2 x i32>
1490  %4 = bitcast <2 x i32> %3 to <1 x i64>
1491  %5 = extractelement <1 x i64> %4, i32 0
1492  ret i64 %5
1493}
1494
1495declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>) nounwind readnone
1496
1497define i64 @test53(<1 x i64> %a, <1 x i64> %b) #0 {
1498; CHECK-LABEL: define i64 @test53(
1499; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1500; CHECK-NEXT:  entry:
1501; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1502; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1503; CHECK-NEXT:    call void @llvm.donothing()
1504; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1505; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
1506; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
1507; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1508; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
1509; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
1510; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1511; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1512; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1513; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1514; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
1515; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
1516; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
1517; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
1518; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1519; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1520; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1521; CHECK-NEXT:    ret i64 [[TMP13]]
1522;
1523entry:
1524  %0 = bitcast <1 x i64> %b to <2 x i32>
1525  %1 = bitcast <1 x i64> %a to <2 x i32>
1526  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1527  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1528  %2 = tail call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1529  %3 = bitcast <1 x i64> %2 to <2 x i32>
1530  %4 = bitcast <2 x i32> %3 to <1 x i64>
1531  %5 = extractelement <1 x i64> %4, i32 0
1532  ret i64 %5
1533}
1534
1535declare <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64>, <1 x i64>) nounwind readnone
1536
1537define i64 @test52(<1 x i64> %a, <1 x i64> %b) #0 {
1538; CHECK-LABEL: define i64 @test52(
1539; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1540; CHECK-NEXT:  entry:
1541; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1542; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1543; CHECK-NEXT:    call void @llvm.donothing()
1544; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1545; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1546; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1547; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1548; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1549; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1550; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1551; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1552; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1553; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1554; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1555; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1556; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1557; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1558; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1559; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1560; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1561; CHECK-NEXT:    ret i64 [[TMP13]]
1562;
1563entry:
1564  %0 = bitcast <1 x i64> %b to <4 x i16>
1565  %1 = bitcast <1 x i64> %a to <4 x i16>
1566  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1567  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1568  %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1569  %3 = bitcast <1 x i64> %2 to <4 x i16>
1570  %4 = bitcast <4 x i16> %3 to <1 x i64>
1571  %5 = extractelement <1 x i64> %4, i32 0
1572  ret i64 %5
1573}
1574
1575define i64 @test51(<1 x i64> %a, <1 x i64> %b) #0 {
1576; CHECK-LABEL: define i64 @test51(
1577; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1578; CHECK-NEXT:  entry:
1579; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1580; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1581; CHECK-NEXT:    call void @llvm.donothing()
1582; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1583; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1584; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1585; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1586; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1587; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1588; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1589; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1590; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1591; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1592; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1593; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1594; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1595; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1596; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1597; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1598; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1599; CHECK-NEXT:    ret i64 [[TMP13]]
1600;
1601entry:
1602  %0 = bitcast <1 x i64> %b to <4 x i16>
1603  %1 = bitcast <1 x i64> %a to <4 x i16>
1604  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1605  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1606  %2 = tail call <1 x i64> @llvm.x86.mmx.pmull.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1607  %3 = bitcast <1 x i64> %2 to <4 x i16>
1608  %4 = bitcast <4 x i16> %3 to <1 x i64>
1609  %5 = extractelement <1 x i64> %4, i32 0
1610  ret i64 %5
1611}
1612
1613declare <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64>, <1 x i64>) nounwind readnone
1614
1615define i64 @test50(<1 x i64> %a, <1 x i64> %b) #0 {
1616; CHECK-LABEL: define i64 @test50(
1617; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1618; CHECK-NEXT:  entry:
1619; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1620; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1621; CHECK-NEXT:    call void @llvm.donothing()
1622; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1623; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1624; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1625; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1626; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1627; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1628; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1629; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1630; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1631; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1632; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1633; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1634; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1635; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1636; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1637; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1638; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1639; CHECK-NEXT:    ret i64 [[TMP13]]
1640;
1641entry:
1642  %0 = bitcast <1 x i64> %b to <4 x i16>
1643  %1 = bitcast <1 x i64> %a to <4 x i16>
1644  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1645  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1646  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulh.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1647  %3 = bitcast <1 x i64> %2 to <4 x i16>
1648  %4 = bitcast <4 x i16> %3 to <1 x i64>
1649  %5 = extractelement <1 x i64> %4, i32 0
1650  ret i64 %5
1651}
1652
1653declare <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) nounwind readnone
1654
1655define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
1656; CHECK-LABEL: define i64 @test49(
1657; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1658; CHECK-NEXT:  entry:
1659; CHECK-NEXT:    [[TMP13:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1660; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1661; CHECK-NEXT:    call void @llvm.donothing()
1662; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
1663; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1664; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1665; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1666; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
1667; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1668; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1669; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1670; CHECK-NEXT:    [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1671; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1672; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
1673; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
1674; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1675; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1676; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
1677; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
1678; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
1679; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
1680; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
1681; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
1682; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1683; CHECK-NEXT:    ret i64 [[TMP18]]
1684;
1685entry:
1686  %0 = bitcast <1 x i64> %b to <4 x i16>
1687  %1 = bitcast <1 x i64> %a to <4 x i16>
1688  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1689  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1690  %2 = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1691  %3 = bitcast <1 x i64> %2 to <2 x i32>
1692  %4 = bitcast <2 x i32> %3 to <1 x i64>
1693  %5 = extractelement <1 x i64> %4, i32 0
1694  ret i64 %5
1695}
1696
1697declare <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64>, <1 x i64>) nounwind readnone
1698
1699define i64 @test48(<1 x i64> %a, <1 x i64> %b) #0 {
1700; CHECK-LABEL: define i64 @test48(
1701; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1702; CHECK-NEXT:  entry:
1703; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1704; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1705; CHECK-NEXT:    call void @llvm.donothing()
1706; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1707; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1708; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1709; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1710; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1711; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1712; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1713; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1714; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1715; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1716; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1717; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1718; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1719; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1720; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1721; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1722; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1723; CHECK-NEXT:    ret i64 [[TMP13]]
1724;
1725entry:
1726  %0 = bitcast <1 x i64> %b to <4 x i16>
1727  %1 = bitcast <1 x i64> %a to <4 x i16>
1728  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1729  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1730  %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1731  %3 = bitcast <1 x i64> %2 to <4 x i16>
1732  %4 = bitcast <4 x i16> %3 to <1 x i64>
1733  %5 = extractelement <1 x i64> %4, i32 0
1734  ret i64 %5
1735}
1736
1737declare <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64>, <1 x i64>) nounwind readnone
1738
1739define i64 @test47(<1 x i64> %a, <1 x i64> %b) #0 {
1740; CHECK-LABEL: define i64 @test47(
1741; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1742; CHECK-NEXT:  entry:
1743; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1744; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1745; CHECK-NEXT:    call void @llvm.donothing()
1746; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
1747; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
1748; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
1749; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
1750; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
1751; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1752; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
1753; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1754; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1755; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1756; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
1757; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
1758; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
1759; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
1760; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1761; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1762; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1763; CHECK-NEXT:    ret i64 [[TMP13]]
1764;
1765entry:
1766  %0 = bitcast <1 x i64> %b to <8 x i8>
1767  %1 = bitcast <1 x i64> %a to <8 x i8>
1768  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
1769  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
1770  %2 = tail call <1 x i64> @llvm.x86.mmx.psubus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1771  %3 = bitcast <1 x i64> %2 to <8 x i8>
1772  %4 = bitcast <8 x i8> %3 to <1 x i64>
1773  %5 = extractelement <1 x i64> %4, i32 0
1774  ret i64 %5
1775}
1776
1777declare <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64>, <1 x i64>) nounwind readnone
1778
1779define i64 @test46(<1 x i64> %a, <1 x i64> %b) #0 {
1780; CHECK-LABEL: define i64 @test46(
1781; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1782; CHECK-NEXT:  entry:
1783; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1784; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1785; CHECK-NEXT:    call void @llvm.donothing()
1786; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1787; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1788; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1789; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1790; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1791; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1792; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1793; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1794; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1795; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1796; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1797; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1798; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1799; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1800; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1801; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1802; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1803; CHECK-NEXT:    ret i64 [[TMP13]]
1804;
1805entry:
1806  %0 = bitcast <1 x i64> %b to <4 x i16>
1807  %1 = bitcast <1 x i64> %a to <4 x i16>
1808  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1809  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1810  %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1811  %3 = bitcast <1 x i64> %2 to <4 x i16>
1812  %4 = bitcast <4 x i16> %3 to <1 x i64>
1813  %5 = extractelement <1 x i64> %4, i32 0
1814  ret i64 %5
1815}
1816
1817declare <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64>, <1 x i64>) nounwind readnone
1818
1819define i64 @test45(<1 x i64> %a, <1 x i64> %b) #0 {
1820; CHECK-LABEL: define i64 @test45(
1821; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1822; CHECK-NEXT:  entry:
1823; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1824; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1825; CHECK-NEXT:    call void @llvm.donothing()
1826; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
1827; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
1828; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
1829; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
1830; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
1831; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1832; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
1833; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1834; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1835; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1836; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
1837; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
1838; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
1839; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
1840; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1841; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1842; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1843; CHECK-NEXT:    ret i64 [[TMP13]]
1844;
1845entry:
1846  %0 = bitcast <1 x i64> %b to <8 x i8>
1847  %1 = bitcast <1 x i64> %a to <8 x i8>
1848  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
1849  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
1850  %2 = tail call <1 x i64> @llvm.x86.mmx.psubs.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1851  %3 = bitcast <1 x i64> %2 to <8 x i8>
1852  %4 = bitcast <8 x i8> %3 to <1 x i64>
1853  %5 = extractelement <1 x i64> %4, i32 0
1854  ret i64 %5
1855}
1856
1857define i64 @test44(<1 x i64> %a, <1 x i64> %b) #0 {
1858; CHECK-LABEL: define i64 @test44(
1859; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1860; CHECK-NEXT:  entry:
1861; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1862; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1863; CHECK-NEXT:    call void @llvm.donothing()
1864; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1865; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
1866; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
1867; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
1868; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
1869; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
1870; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
1871; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
1872; CHECK-NEXT:    [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]]
1873; CHECK-NEXT:    [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]])
1874; CHECK-NEXT:    [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64
1875; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
1876; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
1877; CHECK-NEXT:    ret i64 [[TMP3]]
1878;
1879entry:
1880  %0 = extractelement <1 x i64> %a, i32 0
1881  %mmx_var = bitcast i64 %0 to <1 x i64>
1882  %1 = extractelement <1 x i64> %b, i32 0
1883  %mmx_var1 = bitcast i64 %1 to <1 x i64>
1884  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
1885  %3 = bitcast <1 x i64> %2 to i64
1886  ret i64 %3
1887}
1888
1889declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
1890
1891declare <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64>, <1 x i64>) nounwind readnone
1892
1893define i64 @test43(<1 x i64> %a, <1 x i64> %b) #0 {
1894; CHECK-LABEL: define i64 @test43(
1895; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1896; CHECK-NEXT:  entry:
1897; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1898; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1899; CHECK-NEXT:    call void @llvm.donothing()
1900; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
1901; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
1902; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
1903; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
1904; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
1905; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
1906; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
1907; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
1908; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1909; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1910; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
1911; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
1912; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
1913; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
1914; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1915; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1916; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1917; CHECK-NEXT:    ret i64 [[TMP13]]
1918;
1919entry:
1920  %0 = bitcast <1 x i64> %b to <2 x i32>
1921  %1 = bitcast <1 x i64> %a to <2 x i32>
1922  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
1923  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
1924  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1925  %3 = bitcast <1 x i64> %2 to <2 x i32>
1926  %4 = bitcast <2 x i32> %3 to <1 x i64>
1927  %5 = extractelement <1 x i64> %4, i32 0
1928  ret i64 %5
1929}
1930
1931declare <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64>, <1 x i64>) nounwind readnone
1932
1933define i64 @test42(<1 x i64> %a, <1 x i64> %b) #0 {
1934; CHECK-LABEL: define i64 @test42(
1935; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1936; CHECK-NEXT:  entry:
1937; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1938; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1939; CHECK-NEXT:    call void @llvm.donothing()
1940; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
1941; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
1942; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
1943; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
1944; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
1945; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
1946; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
1947; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
1948; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1949; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1950; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
1951; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
1952; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
1953; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
1954; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1955; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1956; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1957; CHECK-NEXT:    ret i64 [[TMP13]]
1958;
1959entry:
1960  %0 = bitcast <1 x i64> %b to <4 x i16>
1961  %1 = bitcast <1 x i64> %a to <4 x i16>
1962  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
1963  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
1964  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
1965  %3 = bitcast <1 x i64> %2 to <4 x i16>
1966  %4 = bitcast <4 x i16> %3 to <1 x i64>
1967  %5 = extractelement <1 x i64> %4, i32 0
1968  ret i64 %5
1969}
1970
1971declare <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64>, <1 x i64>) nounwind readnone
1972
1973define i64 @test41(<1 x i64> %a, <1 x i64> %b) #0 {
1974; CHECK-LABEL: define i64 @test41(
1975; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
1976; CHECK-NEXT:  entry:
1977; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1978; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
1979; CHECK-NEXT:    call void @llvm.donothing()
1980; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
1981; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
1982; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
1983; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
1984; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
1985; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
1986; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
1987; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
1988; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
1989; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
1990; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
1991; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
1992; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
1993; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
1994; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
1995; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
1996; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
1997; CHECK-NEXT:    ret i64 [[TMP13]]
1998;
1999entry:
2000  %0 = bitcast <1 x i64> %b to <8 x i8>
2001  %1 = bitcast <1 x i64> %a to <8 x i8>
2002  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2003  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2004  %2 = tail call <1 x i64> @llvm.x86.mmx.psub.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2005  %3 = bitcast <1 x i64> %2 to <8 x i8>
2006  %4 = bitcast <8 x i8> %3 to <1 x i64>
2007  %5 = extractelement <1 x i64> %4, i32 0
2008  ret i64 %5
2009}
2010
2011declare <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64>, <1 x i64>) nounwind readnone
2012
2013define i64 @test40(<1 x i64> %a, <1 x i64> %b) #0 {
2014; CHECK-LABEL: define i64 @test40(
2015; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2016; CHECK-NEXT:  entry:
2017; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2018; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2019; CHECK-NEXT:    call void @llvm.donothing()
2020; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2021; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2022; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2023; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2024; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2025; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2026; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2027; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2028; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2029; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2030; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2031; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2032; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2033; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2034; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2035; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2036; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2037; CHECK-NEXT:    ret i64 [[TMP13]]
2038;
2039entry:
2040  %0 = bitcast <1 x i64> %b to <4 x i16>
2041  %1 = bitcast <1 x i64> %a to <4 x i16>
2042  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2043  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2044  %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2045  %3 = bitcast <1 x i64> %2 to <4 x i16>
2046  %4 = bitcast <4 x i16> %3 to <1 x i64>
2047  %5 = extractelement <1 x i64> %4, i32 0
2048  ret i64 %5
2049}
2050
2051declare <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64>, <1 x i64>) nounwind readnone
2052
2053define i64 @test39(<1 x i64> %a, <1 x i64> %b) #0 {
2054; CHECK-LABEL: define i64 @test39(
2055; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2056; CHECK-NEXT:  entry:
2057; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2058; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2059; CHECK-NEXT:    call void @llvm.donothing()
2060; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2061; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2062; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2063; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2064; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2065; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2066; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2067; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2068; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2069; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2070; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2071; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2072; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2073; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2074; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2075; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2076; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2077; CHECK-NEXT:    ret i64 [[TMP13]]
2078;
2079entry:
2080  %0 = bitcast <1 x i64> %b to <8 x i8>
2081  %1 = bitcast <1 x i64> %a to <8 x i8>
2082  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2083  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2084  %2 = tail call <1 x i64> @llvm.x86.mmx.paddus.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2085  %3 = bitcast <1 x i64> %2 to <8 x i8>
2086  %4 = bitcast <8 x i8> %3 to <1 x i64>
2087  %5 = extractelement <1 x i64> %4, i32 0
2088  ret i64 %5
2089}
2090
2091declare <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64>, <1 x i64>) nounwind readnone
2092
2093define i64 @test38(<1 x i64> %a, <1 x i64> %b) #0 {
2094; CHECK-LABEL: define i64 @test38(
2095; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2096; CHECK-NEXT:  entry:
2097; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2098; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2099; CHECK-NEXT:    call void @llvm.donothing()
2100; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2101; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2102; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2103; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2104; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2105; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2106; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2107; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2108; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2109; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2110; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2111; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2112; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2113; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2114; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2115; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2116; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2117; CHECK-NEXT:    ret i64 [[TMP13]]
2118;
2119entry:
2120  %0 = bitcast <1 x i64> %b to <4 x i16>
2121  %1 = bitcast <1 x i64> %a to <4 x i16>
2122  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2123  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2124  %2 = tail call <1 x i64> @llvm.x86.mmx.padds.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2125  %3 = bitcast <1 x i64> %2 to <4 x i16>
2126  %4 = bitcast <4 x i16> %3 to <1 x i64>
2127  %5 = extractelement <1 x i64> %4, i32 0
2128  ret i64 %5
2129}
2130
2131declare <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64>, <1 x i64>) nounwind readnone
2132
2133define i64 @test37(<1 x i64> %a, <1 x i64> %b) #0 {
2134; CHECK-LABEL: define i64 @test37(
2135; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2136; CHECK-NEXT:  entry:
2137; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2138; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2139; CHECK-NEXT:    call void @llvm.donothing()
2140; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2141; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2142; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2143; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2144; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2145; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2146; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2147; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2148; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2149; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2150; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2151; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2152; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2153; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2154; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2155; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2156; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2157; CHECK-NEXT:    ret i64 [[TMP13]]
2158;
2159entry:
2160  %0 = bitcast <1 x i64> %b to <8 x i8>
2161  %1 = bitcast <1 x i64> %a to <8 x i8>
2162  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2163  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2164  %2 = tail call <1 x i64> @llvm.x86.mmx.padds.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2165  %3 = bitcast <1 x i64> %2 to <8 x i8>
2166  %4 = bitcast <8 x i8> %3 to <1 x i64>
2167  %5 = extractelement <1 x i64> %4, i32 0
2168  ret i64 %5
2169}
2170
2171declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
2172
2173define i64 @test36(<1 x i64> %a, <1 x i64> %b) #0 {
2174; CHECK-LABEL: define i64 @test36(
2175; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2176; CHECK-NEXT:  entry:
2177; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2178; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2179; CHECK-NEXT:    call void @llvm.donothing()
2180; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2181; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
2182; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
2183; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
2184; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP5]], i32 0
2185; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
2186; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
2187; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
2188; CHECK-NEXT:    [[_MSPROP3:%.*]] = or <1 x i64> [[TMP7]], [[TMP8]]
2189; CHECK-NEXT:    [[TMP6:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]])
2190; CHECK-NEXT:    [[_MSPROP2:%.*]] = bitcast <1 x i64> [[_MSPROP3]] to i64
2191; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP6]] to i64
2192; CHECK-NEXT:    store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
2193; CHECK-NEXT:    ret i64 [[TMP3]]
2194;
2195entry:
2196  %0 = extractelement <1 x i64> %a, i32 0
2197  %mmx_var = bitcast i64 %0 to <1 x i64>
2198  %1 = extractelement <1 x i64> %b, i32 0
2199  %mmx_var1 = bitcast i64 %1 to <1 x i64>
2200  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %mmx_var, <1 x i64> %mmx_var1)
2201  %3 = bitcast <1 x i64> %2 to i64
2202  ret i64 %3
2203}
2204
2205declare <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64>, <1 x i64>) nounwind readnone
2206
2207define i64 @test35(<1 x i64> %a, <1 x i64> %b) #0 {
2208; CHECK-LABEL: define i64 @test35(
2209; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2210; CHECK-NEXT:  entry:
2211; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2212; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2213; CHECK-NEXT:    call void @llvm.donothing()
2214; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
2215; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
2216; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
2217; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
2218; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP14]] to <1 x i64>
2219; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
2220; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
2221; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
2222; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2223; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2224; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
2225; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <2 x i32>
2226; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
2227; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP16]] to <1 x i64>
2228; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2229; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2230; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2231; CHECK-NEXT:    ret i64 [[TMP13]]
2232;
2233entry:
2234  %0 = bitcast <1 x i64> %b to <2 x i32>
2235  %1 = bitcast <1 x i64> %a to <2 x i32>
2236  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
2237  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
2238  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.d(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2239  %3 = bitcast <1 x i64> %2 to <2 x i32>
2240  %4 = bitcast <2 x i32> %3 to <1 x i64>
2241  %5 = extractelement <1 x i64> %4, i32 0
2242  ret i64 %5
2243}
2244
2245declare <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64>, <1 x i64>) nounwind readnone
2246
2247define i64 @test34(<1 x i64> %a, <1 x i64> %b) #0 {
2248; CHECK-LABEL: define i64 @test34(
2249; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2250; CHECK-NEXT:  entry:
2251; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2252; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2253; CHECK-NEXT:    call void @llvm.donothing()
2254; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2255; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2256; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2257; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2258; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2259; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2260; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2261; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2262; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2263; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2264; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2265; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2266; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2267; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2268; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2269; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2270; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2271; CHECK-NEXT:    ret i64 [[TMP13]]
2272;
2273entry:
2274  %0 = bitcast <1 x i64> %b to <4 x i16>
2275  %1 = bitcast <1 x i64> %a to <4 x i16>
2276  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2277  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2278  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2279  %3 = bitcast <1 x i64> %2 to <4 x i16>
2280  %4 = bitcast <4 x i16> %3 to <1 x i64>
2281  %5 = extractelement <1 x i64> %4, i32 0
2282  ret i64 %5
2283}
2284
2285declare <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64>, <1 x i64>) nounwind readnone
2286
2287define i64 @test33(<1 x i64> %a, <1 x i64> %b) #0 {
2288; CHECK-LABEL: define i64 @test33(
2289; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2290; CHECK-NEXT:  entry:
2291; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2292; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2293; CHECK-NEXT:    call void @llvm.donothing()
2294; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2295; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2296; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2297; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2298; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2299; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2300; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2301; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2302; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2303; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2304; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2305; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2306; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2307; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2308; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2309; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2310; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2311; CHECK-NEXT:    ret i64 [[TMP13]]
2312;
2313entry:
2314  %0 = bitcast <1 x i64> %b to <8 x i8>
2315  %1 = bitcast <1 x i64> %a to <8 x i8>
2316  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2317  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2318  %2 = tail call <1 x i64> @llvm.x86.mmx.padd.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2319  %3 = bitcast <1 x i64> %2 to <8 x i8>
2320  %4 = bitcast <8 x i8> %3 to <1 x i64>
2321  %5 = extractelement <1 x i64> %4, i32 0
2322  ret i64 %5
2323}
2324
2325declare <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64>, <1 x i64>) nounwind readnone
2326
2327define i64 @test32(<1 x i64> %a, <1 x i64> %b) #0 {
2328; CHECK-LABEL: define i64 @test32(
2329; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2330; CHECK-NEXT:  entry:
2331; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2332; CHECK-NEXT:    [[TMP12:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2333; CHECK-NEXT:    call void @llvm.donothing()
2334; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
2335; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2336; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8>
2337; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2338; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
2339; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2340; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
2341; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2342; CHECK-NEXT:    [[TMP16:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2343; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP16]] to i64
2344; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
2345; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
2346; CHECK-NEXT:    [[TMP11:%.*]] = lshr i64 [[TMP10]], 48
2347; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64 [[TMP11]] to <1 x i64>
2348; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2349; CHECK-NEXT:    [[TMP15:%.*]] = bitcast <1 x i64> [[TMP17]] to i64
2350; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP14]] to i64
2351; CHECK-NEXT:    store i64 [[TMP15]], ptr @__msan_retval_tls, align 8
2352; CHECK-NEXT:    ret i64 [[TMP3]]
2353;
2354entry:
2355  %0 = bitcast <1 x i64> %b to <8 x i8>
2356  %1 = bitcast <1 x i64> %a to <8 x i8>
2357  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2358  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2359  %2 = tail call <1 x i64> @llvm.x86.mmx.psad.bw(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2360  %3 = bitcast <1 x i64> %2 to i64
2361  ret i64 %3
2362}
2363
2364declare <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64>, <1 x i64>) nounwind readnone
2365
2366define i64 @test31(<1 x i64> %a, <1 x i64> %b) #0 {
2367; CHECK-LABEL: define i64 @test31(
2368; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2369; CHECK-NEXT:  entry:
2370; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2371; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2372; CHECK-NEXT:    call void @llvm.donothing()
2373; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2374; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2375; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2376; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2377; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2378; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2379; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2380; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2381; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2382; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2383; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2384; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2385; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2386; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2387; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2388; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2389; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2390; CHECK-NEXT:    ret i64 [[TMP13]]
2391;
2392entry:
2393  %0 = bitcast <1 x i64> %b to <4 x i16>
2394  %1 = bitcast <1 x i64> %a to <4 x i16>
2395  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2396  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2397  %2 = tail call <1 x i64> @llvm.x86.mmx.pmins.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2398  %3 = bitcast <1 x i64> %2 to <4 x i16>
2399  %4 = bitcast <4 x i16> %3 to <1 x i64>
2400  %5 = extractelement <1 x i64> %4, i32 0
2401  ret i64 %5
2402}
2403
2404declare <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64>, <1 x i64>) nounwind readnone
2405
2406define i64 @test30(<1 x i64> %a, <1 x i64> %b) #0 {
2407; CHECK-LABEL: define i64 @test30(
2408; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2409; CHECK-NEXT:  entry:
2410; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2411; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2412; CHECK-NEXT:    call void @llvm.donothing()
2413; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2414; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2415; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2416; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2417; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2418; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2419; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2420; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2421; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2422; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2423; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2424; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2425; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2426; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2427; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2428; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2429; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2430; CHECK-NEXT:    ret i64 [[TMP13]]
2431;
2432entry:
2433  %0 = bitcast <1 x i64> %b to <8 x i8>
2434  %1 = bitcast <1 x i64> %a to <8 x i8>
2435  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2436  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2437  %2 = tail call <1 x i64> @llvm.x86.mmx.pminu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2438  %3 = bitcast <1 x i64> %2 to <8 x i8>
2439  %4 = bitcast <8 x i8> %3 to <1 x i64>
2440  %5 = extractelement <1 x i64> %4, i32 0
2441  ret i64 %5
2442}
2443
2444declare <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64>, <1 x i64>) nounwind readnone
2445
2446define i64 @test29(<1 x i64> %a, <1 x i64> %b) #0 {
2447; CHECK-LABEL: define i64 @test29(
2448; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2449; CHECK-NEXT:  entry:
2450; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2451; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2452; CHECK-NEXT:    call void @llvm.donothing()
2453; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2454; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2455; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2456; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2457; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2458; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2459; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2460; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2461; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2462; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2463; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2464; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2465; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2466; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2467; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2468; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2469; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2470; CHECK-NEXT:    ret i64 [[TMP13]]
2471;
2472entry:
2473  %0 = bitcast <1 x i64> %b to <4 x i16>
2474  %1 = bitcast <1 x i64> %a to <4 x i16>
2475  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2476  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2477  %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxs.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2478  %3 = bitcast <1 x i64> %2 to <4 x i16>
2479  %4 = bitcast <4 x i16> %3 to <1 x i64>
2480  %5 = extractelement <1 x i64> %4, i32 0
2481  ret i64 %5
2482}
2483
2484declare <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64>, <1 x i64>) nounwind readnone
2485
2486define i64 @test28(<1 x i64> %a, <1 x i64> %b) #0 {
2487; CHECK-LABEL: define i64 @test28(
2488; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2489; CHECK-NEXT:  entry:
2490; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2491; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2492; CHECK-NEXT:    call void @llvm.donothing()
2493; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2494; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2495; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2496; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2497; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2498; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2499; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2500; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2501; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2502; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2503; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2504; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2505; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2506; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2507; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2508; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2509; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2510; CHECK-NEXT:    ret i64 [[TMP13]]
2511;
2512entry:
2513  %0 = bitcast <1 x i64> %b to <8 x i8>
2514  %1 = bitcast <1 x i64> %a to <8 x i8>
2515  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2516  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2517  %2 = tail call <1 x i64> @llvm.x86.mmx.pmaxu.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2518  %3 = bitcast <1 x i64> %2 to <8 x i8>
2519  %4 = bitcast <8 x i8> %3 to <1 x i64>
2520  %5 = extractelement <1 x i64> %4, i32 0
2521  ret i64 %5
2522}
2523
2524declare <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64>, <1 x i64>) nounwind readnone
2525
2526define i64 @test27(<1 x i64> %a, <1 x i64> %b) #0 {
2527; CHECK-LABEL: define i64 @test27(
2528; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2529; CHECK-NEXT:  entry:
2530; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2531; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2532; CHECK-NEXT:    call void @llvm.donothing()
2533; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2534; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2535; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2536; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2537; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2538; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2539; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2540; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2541; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2542; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2543; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2544; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2545; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2546; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2547; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2548; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2549; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2550; CHECK-NEXT:    ret i64 [[TMP13]]
2551;
2552entry:
2553  %0 = bitcast <1 x i64> %b to <4 x i16>
2554  %1 = bitcast <1 x i64> %a to <4 x i16>
2555  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2556  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2557  %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2558  %3 = bitcast <1 x i64> %2 to <4 x i16>
2559  %4 = bitcast <4 x i16> %3 to <1 x i64>
2560  %5 = extractelement <1 x i64> %4, i32 0
2561  ret i64 %5
2562}
2563
2564declare <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64>, <1 x i64>) nounwind readnone
2565
2566define i64 @test26(<1 x i64> %a, <1 x i64> %b) #0 {
2567; CHECK-LABEL: define i64 @test26(
2568; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2569; CHECK-NEXT:  entry:
2570; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2571; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2572; CHECK-NEXT:    call void @llvm.donothing()
2573; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
2574; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
2575; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
2576; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2577; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP14]] to <1 x i64>
2578; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2579; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
2580; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2581; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2582; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2583; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
2584; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
2585; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
2586; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
2587; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2588; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2589; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2590; CHECK-NEXT:    ret i64 [[TMP13]]
2591;
2592entry:
2593  %0 = bitcast <1 x i64> %b to <8 x i8>
2594  %1 = bitcast <1 x i64> %a to <8 x i8>
2595  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2596  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2597  %2 = tail call <1 x i64> @llvm.x86.mmx.pavg.b(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2598  %3 = bitcast <1 x i64> %2 to <8 x i8>
2599  %4 = bitcast <8 x i8> %3 to <1 x i64>
2600  %5 = extractelement <1 x i64> %4, i32 0
2601  ret i64 %5
2602}
2603
2604declare void @llvm.x86.mmx.movnt.dq(ptr, <1 x i64>) nounwind
2605
2606define void @test25(ptr %p, <1 x i64> %a) nounwind optsize ssp #0 {
2607; CHECK-LABEL: define void @test25(
2608; CHECK-SAME: ptr [[P:%.*]], <1 x i64> [[A:%.*]]) #[[ATTR3:[0-9]+]] {
2609; CHECK-NEXT:  entry:
2610; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2611; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
2612; CHECK-NEXT:    call void @llvm.donothing()
2613; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
2614; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
2615; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
2616; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
2617; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
2618; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
2619; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
2620; CHECK-NEXT:    store <1 x i64> [[TMP3]], ptr [[TMP6]], align 1
2621; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
2622; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]]
2623; CHECK:       7:
2624; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
2625; CHECK-NEXT:    unreachable
2626; CHECK:       8:
2627; CHECK-NEXT:    tail call void @llvm.x86.mmx.movnt.dq(ptr [[P]], <1 x i64> [[MMX_VAR_I]]) #[[ATTR2]]
2628; CHECK-NEXT:    ret void
2629;
2630entry:
2631  %0 = extractelement <1 x i64> %a, i32 0
2632  %mmx_var.i = bitcast i64 %0 to <1 x i64>
2633  tail call void @llvm.x86.mmx.movnt.dq(ptr %p, <1 x i64> %mmx_var.i) nounwind
2634  ret void
2635}
2636
2637declare i32 @llvm.x86.mmx.pmovmskb(<1 x i64>) nounwind readnone
2638
2639define i32 @test24(<1 x i64> %a) #0 {
2640; CHECK-LABEL: define i32 @test24(
2641; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
2642; CHECK-NEXT:  entry:
2643; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2644; CHECK-NEXT:    call void @llvm.donothing()
2645; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP2]] to <8 x i8>
2646; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
2647; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
2648; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2649; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
2650; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
2651; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2652; CHECK:       5:
2653; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2654; CHECK-NEXT:    unreachable
2655; CHECK:       6:
2656; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> [[MMX_VAR_I]]) #[[ATTR2]]
2657; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
2658; CHECK-NEXT:    ret i32 [[TMP1]]
2659;
2660entry:
2661  %0 = bitcast <1 x i64> %a to <8 x i8>
2662  %mmx_var.i = bitcast <8 x i8> %0 to <1 x i64>
2663  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> %mmx_var.i) nounwind
2664  ret i32 %1
2665}
2666
2667declare void @llvm.x86.mmx.maskmovq(<1 x i64>, <1 x i64>, ptr) nounwind
2668
2669define void @test23(<1 x i64> %d, <1 x i64> %n, ptr %p) nounwind optsize ssp #0 {
2670; CHECK-LABEL: define void @test23(
2671; CHECK-SAME: <1 x i64> [[D:%.*]], <1 x i64> [[N:%.*]], ptr [[P:%.*]]) #[[ATTR3]] {
2672; CHECK-NEXT:  entry:
2673; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2674; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2675; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2676; CHECK-NEXT:    call void @llvm.donothing()
2677; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
2678; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[N]] to <8 x i8>
2679; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
2680; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[D]] to <8 x i8>
2681; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
2682; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
2683; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i8> [[TMP3]] to <1 x i64>
2684; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2685; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP9]] to i64
2686; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
2687; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2688; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
2689; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2690; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
2691; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2692; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2693; CHECK:       11:
2694; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2695; CHECK-NEXT:    unreachable
2696; CHECK:       12:
2697; CHECK-NEXT:    tail call void @llvm.x86.mmx.maskmovq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]], ptr [[P]]) #[[ATTR2]]
2698; CHECK-NEXT:    ret void
2699;
2700entry:
2701  %0 = bitcast <1 x i64> %n to <8 x i8>
2702  %1 = bitcast <1 x i64> %d to <8 x i8>
2703  %mmx_var.i = bitcast <8 x i8> %1 to <1 x i64>
2704  %mmx_var1.i = bitcast <8 x i8> %0 to <1 x i64>
2705  tail call void @llvm.x86.mmx.maskmovq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i, ptr %p) nounwind
2706  ret void
2707}
2708
2709declare <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64>, <1 x i64>) nounwind readnone
2710
2711define i64 @test22(<1 x i64> %a, <1 x i64> %b) #0 {
2712; CHECK-LABEL: define i64 @test22(
2713; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2714; CHECK-NEXT:  entry:
2715; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2716; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2717; CHECK-NEXT:    call void @llvm.donothing()
2718; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP8]] to <4 x i16>
2719; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
2720; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
2721; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2722; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP14]] to <1 x i64>
2723; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
2724; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
2725; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2726; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2727; CHECK-NEXT:    [[TMP15:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2728; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
2729; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP15]] to <4 x i16>
2730; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP9]] to <1 x i64>
2731; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
2732; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2733; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <1 x i64> [[TMP12]], i32 0
2734; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
2735; CHECK-NEXT:    ret i64 [[TMP13]]
2736;
2737entry:
2738  %0 = bitcast <1 x i64> %b to <4 x i16>
2739  %1 = bitcast <1 x i64> %a to <4 x i16>
2740  %mmx_var.i = bitcast <4 x i16> %1 to <1 x i64>
2741  %mmx_var1.i = bitcast <4 x i16> %0 to <1 x i64>
2742  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulhu.w(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2743  %3 = bitcast <1 x i64> %2 to <4 x i16>
2744  %4 = bitcast <4 x i16> %3 to <1 x i64>
2745  %5 = extractelement <1 x i64> %4, i32 0
2746  ret i64 %5
2747}
2748
2749declare <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64>, i8) nounwind readnone
2750
2751define i64 @test21(<1 x i64> %a) #0 {
2752; CHECK-LABEL: define i64 @test21(
2753; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
2754; CHECK-NEXT:  entry:
2755; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2756; CHECK-NEXT:    call void @llvm.donothing()
2757; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
2758; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2759; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
2760; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2761; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2762; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2763; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2764; CHECK:       6:
2765; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2766; CHECK-NEXT:    unreachable
2767; CHECK:       7:
2768; CHECK-NEXT:    [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2769; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2770; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <1 x i64>
2771; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
2772; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
2773; CHECK-NEXT:    ret i64 [[TMP5]]
2774;
2775entry:
2776  %0 = bitcast <1 x i64> %a to <4 x i16>
2777  %1 = bitcast <4 x i16> %0 to <1 x i64>
2778  %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
2779  %3 = bitcast <1 x i64> %2 to <4 x i16>
2780  %4 = bitcast <4 x i16> %3 to <1 x i64>
2781  %5 = extractelement <1 x i64> %4, i32 0
2782  ret i64 %5
2783}
2784
2785define i32 @test21_2(<1 x i64> %a) #0 {
2786; CHECK-LABEL: define i32 @test21_2(
2787; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
2788; CHECK-NEXT:  entry:
2789; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2790; CHECK-NEXT:    call void @llvm.donothing()
2791; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
2792; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
2793; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
2794; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
2795; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2796; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
2797; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2798; CHECK:       6:
2799; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2800; CHECK-NEXT:    unreachable
2801; CHECK:       7:
2802; CHECK-NEXT:    [[TMP13:%.*]] = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> [[TMP11]], i8 3) #[[ATTR5]]
2803; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP13]] to <4 x i16>
2804; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <2 x i32>
2805; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
2806; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
2807; CHECK-NEXT:    ret i32 [[TMP5]]
2808;
2809entry:
2810  %0 = bitcast <1 x i64> %a to <4 x i16>
2811  %1 = bitcast <4 x i16> %0 to <1 x i64>
2812  %2 = tail call <1 x i64> @llvm.x86.sse.pshuf.w(<1 x i64> %1, i8 3) nounwind readnone
2813  %3 = bitcast <1 x i64> %2 to <4 x i16>
2814  %4 = bitcast <4 x i16> %3 to <2 x i32>
2815  %5 = extractelement <2 x i32> %4, i32 0
2816  ret i32 %5
2817}
2818
2819declare <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64>, <1 x i64>) nounwind readnone
2820
2821define i64 @test20(<1 x i64> %a, <1 x i64> %b) #0 {
2822; CHECK-LABEL: define i64 @test20(
2823; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2824; CHECK-NEXT:  entry:
2825; CHECK-NEXT:    [[TMP5:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2826; CHECK-NEXT:    [[TMP8:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2827; CHECK-NEXT:    call void @llvm.donothing()
2828; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP5]] to <2 x i32>
2829; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
2830; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
2831; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
2832; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP4]] to <1 x i64>
2833; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
2834; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i32> [[TMP9]] to <1 x i64>
2835; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
2836; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
2837; CHECK-NEXT:    [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
2838; CHECK-NEXT:    [[_MSPROP:%.*]] = bitcast <1 x i64> [[_MSPROP1]] to i64
2839; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2840; CHECK-NEXT:    store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2841; CHECK-NEXT:    ret i64 [[TMP3]]
2842;
2843entry:
2844  %0 = bitcast <1 x i64> %b to <2 x i32>
2845  %1 = bitcast <1 x i64> %a to <2 x i32>
2846  %mmx_var.i = bitcast <2 x i32> %1 to <1 x i64>
2847  %mmx_var1.i = bitcast <2 x i32> %0 to <1 x i64>
2848  %2 = tail call <1 x i64> @llvm.x86.mmx.pmulu.dq(<1 x i64> %mmx_var.i, <1 x i64> %mmx_var1.i) nounwind
2849  %3 = bitcast <1 x i64> %2 to i64
2850  ret i64 %3
2851}
2852
2853declare <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>) nounwind readnone
2854
2855define <2 x double> @test19(<1 x i64> %a) #0 {
2856; CHECK-LABEL: define <2 x double> @test19(
2857; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
2858; CHECK-NEXT:  entry:
2859; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2860; CHECK-NEXT:    call void @llvm.donothing()
2861; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP4]] to <2 x i32>
2862; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
2863; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i32> [[TMP7]] to <1 x i64>
2864; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
2865; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
2866; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
2867; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2868; CHECK:       6:
2869; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2870; CHECK-NEXT:    unreachable
2871; CHECK:       7:
2872; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> [[TMP8]]) #[[ATTR5]]
2873; CHECK-NEXT:    store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
2874; CHECK-NEXT:    ret <2 x double> [[TMP2]]
2875;
2876entry:
2877  %0 = bitcast <1 x i64> %a to <2 x i32>
2878  %1 = bitcast <2 x i32> %0 to <1 x i64>
2879  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64> %1) nounwind readnone
2880  ret <2 x double> %2
2881}
2882
2883declare <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
2884
2885define i64 @test18(<2 x double> %a) #0 {
2886; CHECK-LABEL: define i64 @test18(
2887; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
2888; CHECK-NEXT:  entry:
2889; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
2890; CHECK-NEXT:    call void @llvm.donothing()
2891; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
2892; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2893; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2894; CHECK:       2:
2895; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2896; CHECK-NEXT:    unreachable
2897; CHECK:       3:
2898; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> [[A]]) #[[ATTR5]]
2899; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
2900; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
2901; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
2902; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
2903; CHECK-NEXT:    ret i64 [[TMP3]]
2904;
2905entry:
2906  %0 = tail call <1 x i64> @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
2907  %1 = bitcast <1 x i64> %0 to <2 x i32>
2908  %2 = bitcast <2 x i32> %1 to <1 x i64>
2909  %3 = extractelement <1 x i64> %2, i32 0
2910  ret i64 %3
2911}
2912
2913declare <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
2914
2915define i64 @test17(<2 x double> %a) #0 {
2916; CHECK-LABEL: define i64 @test17(
2917; CHECK-SAME: <2 x double> [[A:%.*]]) #[[ATTR1]] {
2918; CHECK-NEXT:  entry:
2919; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
2920; CHECK-NEXT:    call void @llvm.donothing()
2921; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
2922; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2923; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
2924; CHECK:       2:
2925; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2926; CHECK-NEXT:    unreachable
2927; CHECK:       3:
2928; CHECK-NEXT:    [[TMP8:%.*]] = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> [[A]]) #[[ATTR5]]
2929; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
2930; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
2931; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
2932; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
2933; CHECK-NEXT:    ret i64 [[TMP3]]
2934;
2935entry:
2936  %0 = tail call <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
2937  %1 = bitcast <1 x i64> %0 to <2 x i32>
2938  %2 = bitcast <2 x i32> %1 to <1 x i64>
2939  %3 = extractelement <1 x i64> %2, i32 0
2940  ret i64 %3
2941}
2942
2943declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>, i8) nounwind readnone
2944
2945define i64 @test16(<1 x i64> %a, <1 x i64> %b) #0 {
2946; CHECK-LABEL: define i64 @test16(
2947; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
2948; CHECK-NEXT:  entry:
2949; CHECK-NEXT:    [[TMP6:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2950; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
2951; CHECK-NEXT:    call void @llvm.donothing()
2952; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
2953; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <1 x i64> [[A]], i32 0
2954; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i64 [[_MSPROP]] to <1 x i64>
2955; CHECK-NEXT:    [[MMX_VAR:%.*]] = bitcast i64 [[TMP0]] to <1 x i64>
2956; CHECK-NEXT:    [[_MSPROP1:%.*]] = extractelement <1 x i64> [[TMP7]], i32 0
2957; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <1 x i64> [[B]], i32 0
2958; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i64 [[_MSPROP1]] to <1 x i64>
2959; CHECK-NEXT:    [[MMX_VAR1:%.*]] = bitcast i64 [[TMP1]] to <1 x i64>
2960; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
2961; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP11]], 0
2962; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
2963; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP12]], 0
2964; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP2]]
2965; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2966; CHECK:       8:
2967; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
2968; CHECK-NEXT:    unreachable
2969; CHECK:       9:
2970; CHECK-NEXT:    [[TMP10:%.*]] = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> [[MMX_VAR]], <1 x i64> [[MMX_VAR1]], i8 16)
2971; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP10]] to i64
2972; CHECK-NEXT:    store i64 0, ptr @__msan_retval_tls, align 8
2973; CHECK-NEXT:    ret i64 [[TMP3]]
2974;
2975entry:
2976  %0 = extractelement <1 x i64> %a, i32 0
2977  %mmx_var = bitcast i64 %0 to <1 x i64>
2978  %1 = extractelement <1 x i64> %b, i32 0
2979  %mmx_var1 = bitcast i64 %1 to <1 x i64>
2980  %2 = tail call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %mmx_var, <1 x i64> %mmx_var1, i8 16)
2981  %3 = bitcast <1 x i64> %2 to i64
2982  ret i64 %3
2983}
2984
2985declare <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64>) nounwind readnone
2986
2987define i64 @test15(<1 x i64> %a) #0 {
2988; CHECK-LABEL: define i64 @test15(
2989; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
2990; CHECK-NEXT:  entry:
2991; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
2992; CHECK-NEXT:    call void @llvm.donothing()
2993; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <2 x i32>
2994; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
2995; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <2 x i32> [[TMP8]] to <1 x i64>
2996; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
2997; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> [[TMP1]]) #[[ATTR5]]
2998; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <2 x i32>
2999; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
3000; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP6]] to <1 x i64>
3001; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
3002; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
3003; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
3004; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
3005; CHECK-NEXT:    ret i64 [[TMP10]]
3006;
3007entry:
3008  %0 = bitcast <1 x i64> %a to <2 x i32>
3009  %1 = bitcast <2 x i32> %0 to <1 x i64>
3010  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.d(<1 x i64> %1) nounwind readnone
3011  %3 = bitcast <1 x i64> %2 to <2 x i32>
3012  %4 = bitcast <2 x i32> %3 to <1 x i64>
3013  %5 = extractelement <1 x i64> %4, i32 0
3014  ret i64 %5
3015}
3016
3017declare <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64>) nounwind readnone
3018
3019define i64 @test14(<1 x i64> %a) #0 {
3020; CHECK-LABEL: define i64 @test14(
3021; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
3022; CHECK-NEXT:  entry:
3023; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3024; CHECK-NEXT:    call void @llvm.donothing()
3025; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
3026; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3027; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <4 x i16> [[TMP8]] to <1 x i64>
3028; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3029; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> [[TMP1]]) #[[ATTR5]]
3030; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <4 x i16>
3031; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <4 x i16>
3032; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[TMP6]] to <1 x i64>
3033; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3034; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
3035; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
3036; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
3037; CHECK-NEXT:    ret i64 [[TMP10]]
3038;
3039entry:
3040  %0 = bitcast <1 x i64> %a to <4 x i16>
3041  %1 = bitcast <4 x i16> %0 to <1 x i64>
3042  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.w(<1 x i64> %1) nounwind readnone
3043  %3 = bitcast <1 x i64> %2 to <4 x i16>
3044  %4 = bitcast <4 x i16> %3 to <1 x i64>
3045  %5 = extractelement <1 x i64> %4, i32 0
3046  ret i64 %5
3047}
3048
3049declare <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64>) nounwind readnone
3050
3051define i64 @test13(<1 x i64> %a) #0 {
3052; CHECK-LABEL: define i64 @test13(
3053; CHECK-SAME: <1 x i64> [[A:%.*]]) #[[ATTR1]] {
3054; CHECK-NEXT:  entry:
3055; CHECK-NEXT:    [[TMP7:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3056; CHECK-NEXT:    call void @llvm.donothing()
3057; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
3058; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
3059; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
3060; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3061; CHECK-NEXT:    [[TMP12:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> [[TMP1]]) #[[ATTR5]]
3062; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <1 x i64> [[TMP11]] to <8 x i8>
3063; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP12]] to <8 x i8>
3064; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
3065; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
3066; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP4]], i32 0
3067; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <1 x i64> [[TMP9]], i32 0
3068; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
3069; CHECK-NEXT:    ret i64 [[TMP10]]
3070;
3071entry:
3072  %0 = bitcast <1 x i64> %a to <8 x i8>
3073  %1 = bitcast <8 x i8> %0 to <1 x i64>
3074  %2 = tail call <1 x i64> @llvm.x86.ssse3.pabs.b(<1 x i64> %1) nounwind readnone
3075  %3 = bitcast <1 x i64> %2 to <8 x i8>
3076  %4 = bitcast <8 x i8> %3 to <1 x i64>
3077  %5 = extractelement <1 x i64> %4, i32 0
3078  ret i64 %5
3079}
3080
3081declare <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64>, <1 x i64>) nounwind readnone
3082
3083define i64 @test12(<1 x i64> %a, <1 x i64> %b) #0 {
3084; CHECK-LABEL: define i64 @test12(
3085; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3086; CHECK-NEXT:  entry:
3087; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3088; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3089; CHECK-NEXT:    call void @llvm.donothing()
3090; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
3091; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
3092; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
3093; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
3094; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
3095; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
3096; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
3097; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
3098; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3099; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3100; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
3101; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
3102; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
3103; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
3104; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3105; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3106; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3107; CHECK-NEXT:    ret i64 [[TMP15]]
3108;
3109entry:
3110  %0 = bitcast <1 x i64> %b to <2 x i32>
3111  %1 = bitcast <1 x i64> %a to <2 x i32>
3112  %2 = bitcast <2 x i32> %1 to <1 x i64>
3113  %3 = bitcast <2 x i32> %0 to <1 x i64>
3114  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3115  %5 = bitcast <1 x i64> %4 to <2 x i32>
3116  %6 = bitcast <2 x i32> %5 to <1 x i64>
3117  %7 = extractelement <1 x i64> %6, i32 0
3118  ret i64 %7
3119}
3120
3121declare <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64>, <1 x i64>) nounwind readnone
3122
3123define i64 @test11(<1 x i64> %a, <1 x i64> %b) #0 {
3124; CHECK-LABEL: define i64 @test11(
3125; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3126; CHECK-NEXT:  entry:
3127; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3128; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3129; CHECK-NEXT:    call void @llvm.donothing()
3130; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
3131; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
3132; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3133; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3134; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3135; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
3136; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
3137; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3138; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3139; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3140; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
3141; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
3142; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
3143; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
3144; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3145; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3146; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3147; CHECK-NEXT:    ret i64 [[TMP15]]
3148;
3149entry:
3150  %0 = bitcast <1 x i64> %b to <4 x i16>
3151  %1 = bitcast <1 x i64> %a to <4 x i16>
3152  %2 = bitcast <4 x i16> %1 to <1 x i64>
3153  %3 = bitcast <4 x i16> %0 to <1 x i64>
3154  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3155  %5 = bitcast <1 x i64> %4 to <4 x i16>
3156  %6 = bitcast <4 x i16> %5 to <1 x i64>
3157  %7 = extractelement <1 x i64> %6, i32 0
3158  ret i64 %7
3159}
3160
3161declare <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64>, <1 x i64>) nounwind readnone
3162
3163define i64 @test10(<1 x i64> %a, <1 x i64> %b) #0 {
3164; CHECK-LABEL: define i64 @test10(
3165; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3166; CHECK-NEXT:  entry:
3167; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3168; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3169; CHECK-NEXT:    call void @llvm.donothing()
3170; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
3171; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
3172; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
3173; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
3174; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
3175; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3176; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
3177; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3178; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3179; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3180; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
3181; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
3182; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
3183; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64>
3184; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3185; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3186; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3187; CHECK-NEXT:    ret i64 [[TMP15]]
3188;
3189entry:
3190  %0 = bitcast <1 x i64> %b to <8 x i8>
3191  %1 = bitcast <1 x i64> %a to <8 x i8>
3192  %2 = bitcast <8 x i8> %1 to <1 x i64>
3193  %3 = bitcast <8 x i8> %0 to <1 x i64>
3194  %4 = tail call <1 x i64> @llvm.x86.ssse3.psign.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3195  %5 = bitcast <1 x i64> %4 to <8 x i8>
3196  %6 = bitcast <8 x i8> %5 to <1 x i64>
3197  %7 = extractelement <1 x i64> %6, i32 0
3198  ret i64 %7
3199}
3200
3201declare <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64>, <1 x i64>) nounwind readnone
3202
3203define i64 @test9(<1 x i64> %a, <1 x i64> %b) #0 {
3204; CHECK-LABEL: define i64 @test9(
3205; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3206; CHECK-NEXT:  entry:
3207; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3208; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3209; CHECK-NEXT:    call void @llvm.donothing()
3210; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
3211; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
3212; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
3213; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
3214; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <8 x i8> [[TMP13]] to <1 x i64>
3215; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3216; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP12]] to <1 x i64>
3217; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3218; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3219; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3220; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <8 x i8>
3221; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <8 x i8>
3222; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
3223; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[TMP19]] to <1 x i64>
3224; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3225; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3226; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3227; CHECK-NEXT:    ret i64 [[TMP15]]
3228;
3229entry:
3230  %0 = bitcast <1 x i64> %b to <8 x i8>
3231  %1 = bitcast <1 x i64> %a to <8 x i8>
3232  %2 = bitcast <8 x i8> %1 to <1 x i64>
3233  %3 = bitcast <8 x i8> %0 to <1 x i64>
3234  %4 = tail call <1 x i64> @llvm.x86.ssse3.pshuf.b(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3235  %5 = bitcast <1 x i64> %4 to <8 x i8>
3236  %6 = bitcast <8 x i8> %5 to <1 x i64>
3237  %7 = extractelement <1 x i64> %6, i32 0
3238  ret i64 %7
3239}
3240
3241declare <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64>, <1 x i64>) nounwind readnone
3242
3243define i64 @test8(<1 x i64> %a, <1 x i64> %b) #0 {
3244; CHECK-LABEL: define i64 @test8(
3245; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3246; CHECK-NEXT:  entry:
3247; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3248; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3249; CHECK-NEXT:    call void @llvm.donothing()
3250; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
3251; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
3252; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3253; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3254; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3255; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
3256; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
3257; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3258; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP16]], [[TMP8]]
3259; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3260; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
3261; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
3262; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
3263; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
3264; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3265; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3266; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3267; CHECK-NEXT:    ret i64 [[TMP15]]
3268;
3269entry:
3270  %0 = bitcast <1 x i64> %b to <4 x i16>
3271  %1 = bitcast <1 x i64> %a to <4 x i16>
3272  %2 = bitcast <4 x i16> %1 to <1 x i64>
3273  %3 = bitcast <4 x i16> %0 to <1 x i64>
3274  %4 = tail call <1 x i64> @llvm.x86.ssse3.pmul.hr.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3275  %5 = bitcast <1 x i64> %4 to <4 x i16>
3276  %6 = bitcast <4 x i16> %5 to <1 x i64>
3277  %7 = extractelement <1 x i64> %6, i32 0
3278  ret i64 %7
3279}
3280
3281declare <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) nounwind readnone
3282
3283define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
3284; CHECK-LABEL: define i64 @test7(
3285; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3286; CHECK-NEXT:  entry:
3287; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3288; CHECK-NEXT:    [[TMP15:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3289; CHECK-NEXT:    call void @llvm.donothing()
3290; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
3291; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8>
3292; CHECK-NEXT:    [[TMP18:%.*]] = bitcast <1 x i64> [[TMP15]] to <8 x i8>
3293; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8>
3294; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <8 x i8> [[TMP18]] to <1 x i64>
3295; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3296; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
3297; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3298; CHECK-NEXT:    [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
3299; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3300; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
3301; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
3302; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3303; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
3304; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
3305; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
3306; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
3307; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
3308; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3309; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
3310; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3311; CHECK-NEXT:    ret i64 [[TMP20]]
3312;
3313entry:
3314  %0 = bitcast <1 x i64> %b to <8 x i8>
3315  %1 = bitcast <1 x i64> %a to <8 x i8>
3316  %2 = bitcast <8 x i8> %1 to <1 x i64>
3317  %3 = bitcast <8 x i8> %0 to <1 x i64>
3318  %4 = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3319  %5 = bitcast <1 x i64> %4 to <8 x i8>
3320  %6 = bitcast <8 x i8> %5 to <1 x i64>
3321  %7 = extractelement <1 x i64> %6, i32 0
3322  ret i64 %7
3323}
3324
3325declare <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64>, <1 x i64>) nounwind readnone
3326
3327define i64 @test6(<1 x i64> %a, <1 x i64> %b) #0 {
3328; CHECK-LABEL: define i64 @test6(
3329; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3330; CHECK-NEXT:  entry:
3331; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3332; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3333; CHECK-NEXT:    call void @llvm.donothing()
3334; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
3335; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
3336; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3337; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3338; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3339; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
3340; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
3341; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3342; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3343; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3344; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
3345; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
3346; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
3347; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
3348; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3349; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3350; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3351; CHECK-NEXT:    ret i64 [[TMP15]]
3352;
3353entry:
3354  %0 = bitcast <1 x i64> %b to <4 x i16>
3355  %1 = bitcast <1 x i64> %a to <4 x i16>
3356  %2 = bitcast <4 x i16> %1 to <1 x i64>
3357  %3 = bitcast <4 x i16> %0 to <1 x i64>
3358  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3359  %5 = bitcast <1 x i64> %4 to <4 x i16>
3360  %6 = bitcast <4 x i16> %5 to <1 x i64>
3361  %7 = extractelement <1 x i64> %6, i32 0
3362  ret i64 %7
3363}
3364
3365declare <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64>, <1 x i64>) nounwind readnone
3366
3367define i64 @test5(<1 x i64> %a, <1 x i64> %b) #0 {
3368; CHECK-LABEL: define i64 @test5(
3369; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3370; CHECK-NEXT:  entry:
3371; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3372; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3373; CHECK-NEXT:    call void @llvm.donothing()
3374; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
3375; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
3376; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
3377; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
3378; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
3379; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
3380; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
3381; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
3382; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3383; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3384; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
3385; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
3386; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
3387; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
3388; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3389; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3390; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3391; CHECK-NEXT:    ret i64 [[TMP15]]
3392;
3393entry:
3394  %0 = bitcast <1 x i64> %b to <2 x i32>
3395  %1 = bitcast <1 x i64> %a to <2 x i32>
3396  %2 = bitcast <2 x i32> %1 to <1 x i64>
3397  %3 = bitcast <2 x i32> %0 to <1 x i64>
3398  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3399  %5 = bitcast <1 x i64> %4 to <2 x i32>
3400  %6 = bitcast <2 x i32> %5 to <1 x i64>
3401  %7 = extractelement <1 x i64> %6, i32 0
3402  ret i64 %7
3403}
3404
3405declare <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64>, <1 x i64>) nounwind readnone
3406
3407define i64 @test4(<1 x i64> %a, <1 x i64> %b) #0 {
3408; CHECK-LABEL: define i64 @test4(
3409; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3410; CHECK-NEXT:  entry:
3411; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3412; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3413; CHECK-NEXT:    call void @llvm.donothing()
3414; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
3415; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
3416; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3417; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3418; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3419; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
3420; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
3421; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3422; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.w(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3423; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3424; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
3425; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
3426; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
3427; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
3428; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3429; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3430; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3431; CHECK-NEXT:    ret i64 [[TMP15]]
3432;
3433entry:
3434  %0 = bitcast <1 x i64> %b to <4 x i16>
3435  %1 = bitcast <1 x i64> %a to <4 x i16>
3436  %2 = bitcast <4 x i16> %1 to <1 x i64>
3437  %3 = bitcast <4 x i16> %0 to <1 x i64>
3438  %4 = tail call <1 x i64> @llvm.x86.ssse3.phsub.w(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3439  %5 = bitcast <1 x i64> %4 to <4 x i16>
3440  %6 = bitcast <4 x i16> %5 to <1 x i64>
3441  %7 = extractelement <1 x i64> %6, i32 0
3442  ret i64 %7
3443}
3444
3445declare <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64>, <1 x i64>) nounwind readnone
3446
3447define i64 @test3(<1 x i64> %a, <1 x i64> %b) #0 {
3448; CHECK-LABEL: define i64 @test3(
3449; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3450; CHECK-NEXT:  entry:
3451; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3452; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3453; CHECK-NEXT:    call void @llvm.donothing()
3454; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <4 x i16>
3455; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <4 x i16>
3456; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
3457; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <4 x i16>
3458; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
3459; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
3460; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <4 x i16> [[TMP12]] to <1 x i64>
3461; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
3462; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3463; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3464; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <4 x i16>
3465; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <4 x i16>
3466; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP11]] to <1 x i64>
3467; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP19]] to <1 x i64>
3468; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3469; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3470; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3471; CHECK-NEXT:    ret i64 [[TMP15]]
3472;
3473entry:
3474  %0 = bitcast <1 x i64> %b to <4 x i16>
3475  %1 = bitcast <1 x i64> %a to <4 x i16>
3476  %2 = bitcast <4 x i16> %1 to <1 x i64>
3477  %3 = bitcast <4 x i16> %0 to <1 x i64>
3478  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.sw(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3479  %5 = bitcast <1 x i64> %4 to <4 x i16>
3480  %6 = bitcast <4 x i16> %5 to <1 x i64>
3481  %7 = extractelement <1 x i64> %6, i32 0
3482  ret i64 %7
3483}
3484
3485declare <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64>, <1 x i64>) nounwind readnone
3486
3487define i64 @test2(<1 x i64> %a, <1 x i64> %b) #0 {
3488; CHECK-LABEL: define i64 @test2(
3489; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR1]] {
3490; CHECK-NEXT:  entry:
3491; CHECK-NEXT:    [[TMP9:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3492; CHECK-NEXT:    [[TMP10:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3493; CHECK-NEXT:    call void @llvm.donothing()
3494; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP9]] to <2 x i32>
3495; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <1 x i64> [[B]] to <2 x i32>
3496; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP10]] to <2 x i32>
3497; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <1 x i64> [[A]] to <2 x i32>
3498; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <2 x i32> [[TMP13]] to <1 x i64>
3499; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <1 x i64>
3500; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <2 x i32> [[TMP12]] to <1 x i64>
3501; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP0]] to <1 x i64>
3502; CHECK-NEXT:    [[_MSPROP:%.*]] = call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP16]], <1 x i64> [[TMP8]])
3503; CHECK-NEXT:    [[TMP18:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> [[TMP2]], <1 x i64> [[TMP17]]) #[[ATTR5]]
3504; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[_MSPROP]] to <2 x i32>
3505; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <1 x i64> [[TMP18]] to <2 x i32>
3506; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
3507; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[TMP19]] to <1 x i64>
3508; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
3509; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <1 x i64> [[TMP14]], i32 0
3510; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
3511; CHECK-NEXT:    ret i64 [[TMP15]]
3512;
3513entry:
3514  %0 = bitcast <1 x i64> %b to <2 x i32>
3515  %1 = bitcast <1 x i64> %a to <2 x i32>
3516  %2 = bitcast <2 x i32> %1 to <1 x i64>
3517  %3 = bitcast <2 x i32> %0 to <1 x i64>
3518  %4 = tail call <1 x i64> @llvm.x86.ssse3.phadd.d(<1 x i64> %2, <1 x i64> %3) nounwind readnone
3519  %5 = bitcast <1 x i64> %4 to <2 x i32>
3520  %6 = bitcast <2 x i32> %5 to <1 x i64>
3521  %7 = extractelement <1 x i64> %6, i32 0
3522  ret i64 %7
3523}
3524
3525define <4 x float> @test89(<4 x float> %a, <1 x i64> %b) nounwind #0 {
3526; ALL-LABEL: test89:
3527; ALL:       # %bb.0:
3528; ALL-NEXT:    cvtpi2ps %mm0, %xmm0
3529; ALL-NEXT:    ret{{[l|q]}}
3530; CHECK-LABEL: define <4 x float> @test89(
3531; CHECK-SAME: <4 x float> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR4:[0-9]+]] {
3532; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
3533; CHECK-NEXT:    [[TMP4:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
3534; CHECK-NEXT:    call void @llvm.donothing()
3535; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
3536; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
3537; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i64> [[TMP4]] to i64
3538; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
3539; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3540; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
3541; CHECK:       5:
3542; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
3543; CHECK-NEXT:    unreachable
3544; CHECK:       6:
3545; CHECK-NEXT:    [[C:%.*]] = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> [[A]], <1 x i64> [[B]])
3546; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
3547; CHECK-NEXT:    ret <4 x float> [[C]]
3548;
3549  %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, <1 x i64> %b)
3550  ret <4 x float> %c
3551}
3552
3553declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>) nounwind readnone
3554
3555define void @test90() #0 {
3556; ALL-LABEL: test90:
3557; ALL:       # %bb.0:
3558; ALL-NEXT:    emms
3559; ALL-NEXT:    ret{{[l|q]}}
3560; CHECK-LABEL: define void @test90(
3561; CHECK-SAME: ) #[[ATTR1]] {
3562; CHECK-NEXT:    call void @llvm.donothing()
3563; CHECK-NEXT:    call void @llvm.x86.mmx.emms()
3564; CHECK-NEXT:    ret void
3565;
3566  call void @llvm.x86.mmx.emms()
3567  ret void
3568}
3569
3570declare void @llvm.x86.mmx.emms()
3571
3572define <1 x i64> @test_mm_insert_pi16(<1 x i64> %a.coerce, i32 %d) nounwind #0 {
3573; CHECK-LABEL: define <1 x i64> @test_mm_insert_pi16(
3574; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]], i32 [[D:%.*]]) #[[ATTR4]] {
3575; CHECK-NEXT:  entry:
3576; CHECK-NEXT:    [[TMP3:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3577; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
3578; CHECK-NEXT:    call void @llvm.donothing()
3579; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <1 x i64> [[TMP3]] to i64
3580; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
3581; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i32 [[TMP6]], 0
3582; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
3583; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
3584; CHECK:       3:
3585; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
3586; CHECK-NEXT:    unreachable
3587; CHECK:       4:
3588; CHECK-NEXT:    [[TMP9:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> [[A_COERCE]], i32 [[D]], i32 2)
3589; CHECK-NEXT:    store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
3590; CHECK-NEXT:    ret <1 x i64> [[TMP9]]
3591;
3592entry:
3593  %1 = tail call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %a.coerce, i32 %d, i32 2)
3594  ret <1 x i64> %1
3595}
3596
3597declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32 immarg)
3598
3599define i32 @test_mm_extract_pi16(<1 x i64> %a.coerce) nounwind #0 {
3600; CHECK-LABEL: define i32 @test_mm_extract_pi16(
3601; CHECK-SAME: <1 x i64> [[A_COERCE:%.*]]) #[[ATTR4]] {
3602; CHECK-NEXT:  entry:
3603; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
3604; CHECK-NEXT:    call void @llvm.donothing()
3605; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP2]] to i64
3606; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
3607; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
3608; CHECK:       2:
3609; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
3610; CHECK-NEXT:    unreachable
3611; CHECK:       3:
3612; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> [[A_COERCE]], i32 2)
3613; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
3614; CHECK-NEXT:    ret i32 [[TMP1]]
3615;
3616entry:
3617  %1 = tail call i32 @llvm.x86.mmx.pextr.w(<1 x i64> %a.coerce, i32 2)
3618  ret i32 %1
3619}
3620
3621declare i32 @llvm.x86.mmx.pextr.w(<1 x i64>, i32 immarg)
3622
3623attributes #0 = { sanitize_memory }
3624;.
3625; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
3626;.
3627