xref: /llvm-project/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c (revision de7c0068329d78027df7b7184d72646c1ca9f2bd)
1 // REQUIRES: powerpc-registered-target
2 
3 // RUN: %clang -S -emit-llvm -target powerpc64-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
4 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
5 // RUN: %clang -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
6 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
7 
8 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
9 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
10 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-freebsd13.0 -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
11 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
12 
13 // RUN: %clang -S -emit-llvm -target powerpc64-ibm-aix -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
14 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
15 
16 // RUN: %clang -x c++ -S -emit-llvm -target powerpc64le-gnu-linux -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
17 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -fsyntax-only
18 
19 #include <tmmintrin.h>
20 
21 __m64 res, m1, m2;
22 __m128i resi, mi1, mi2;
23 
24 void __attribute__((noinline))
test_abs()25 test_abs() {
26   resi = _mm_abs_epi16(mi1);
27   resi = _mm_abs_epi32(mi1);
28   resi = _mm_abs_epi8(mi1);
29   res = _mm_abs_pi16(m1);
30   res = _mm_abs_pi32(m1);
31   res = _mm_abs_pi8(m1);
32 }
33 
34 // CHECK-LABEL: @test_abs
35 
36 // CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi16
37 // CHECK: call <8 x i16> @vec_abs(short vector[8])
38 
39 // CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi32
40 // CHECK: call <4 x i32> @vec_abs(int vector[4])
41 
42 // CHECK-LABEL: define available_externally <2 x i64> @_mm_abs_epi8
43 // CHECK: call <16 x i8> @vec_abs(signed char vector[16])
44 
45 // CHECK-LABEL: define available_externally i64 @_mm_abs_pi16
46 // CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_abs(short vector[8])
47 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %[[ABS]] to <2 x i64>
48 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
49 
50 // CHECK-LABEL: define available_externally i64 @_mm_abs_pi32
51 // CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <4 x i32> @vec_abs(int vector[4])
52 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %[[ABS]] to <2 x i64>
53 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
54 
55 // CHECK-LABEL: define available_externally i64 @_mm_abs_pi8
56 // CHECK: %[[ABS:[0-9a-zA-Z_.]+]] = call <16 x i8> @vec_abs(signed char vector[16])
57 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %[[ABS]] to <2 x i64>
58 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
59 
60 void __attribute__((noinline))
test_alignr()61 test_alignr() {
62   resi = _mm_alignr_epi8(mi1, mi2, 1U);
63   res = _mm_alignr_pi8(m1, m2, 1U);
64 }
65 
66 // CHECK-LABEL: @test_alignr
67 
68 // CHECK-LABEL: define available_externally <2 x i64> @_mm_alignr_epi8
69 // CHECK: %[[CONST:[0-9a-zA-Z_.]+]] = call i1 @llvm.is.constant.i32(i32 %0)
70 // CHECK: br i1 %[[CONST]]
71 // CHECK-BE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)
72 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
73 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
74 // CHECk-LE: call <16 x i8> @vec_sld(unsigned char vector[16], unsigned char vector[16], unsigned int)
75 // CHECK-LE: call <16 x i8> @vec_reve(unsigned char vector[16])
76 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
77 // CHECK: store <2 x i64> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
78 // CHECK: %[[SUB:[0-9a-zA-Z_.]+]] = sub i32 %{{[0-9a-zA-Z_.]+}}, 16
79 // CHECK: %[[MUL:[0-9a-zA-Z_.]+]] = mul i32 %[[SUB]], 8
80 // CHECK: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL]] to i8
81 // CHECK: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
82 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
83 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
84 // CHECK: %[[SUB2:[0-9a-zA-Z_.]+]] = sub i32 16, %{{[0-9a-zA-Z_.]+}}
85 // CHECK: %[[MUL2:[0-9a-zA-Z_.]+]] = mul i32 %[[SUB2]], 8
86 // CHECK-BE: %[[TRUNC:[0-9a-zA-Z_.]+]] = trunc i32 %[[MUL2]] to i8
87 // CHECK-BE: call <16 x i8> @vec_splats(unsigned char)(i8 noundef zeroext %[[TRUNC]])
88 // CHECK-BE: mul i32 %{{[0-9a-zA-Z_.]+}}, 8
89 // CHECK-BE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
90 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
91 // CHECK-BE: call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
92 // CHECK-LE: %[[MUL3:[0-9a-zA-Z_.]+]] = mul i32 %{{[0-9a-zA-Z_.]+}}, 8
93 // CHECK-LE: trunc i32 %[[MUL3]] to i8
94 // CHECK-LE: call <16 x i8> @vec_splats(unsigned char)
95 // CHECK-LE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
96 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
97 // CHECK-LE: call <16 x i8> @vec_or(unsigned char vector[16], unsigned char vector[16])
98 
99 // CHECK-LABEL: define available_externally i64 @_mm_alignr_pi8
100 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp ult i32 %{{[0-9a-zA-Z_.]+}}, 16
101 // CHECK: br i1 %[[CMP]]
102 // CHECK-BE: call <16 x i8> @vec_slo(unsigned char vector[16], unsigned char vector[16])
103 // CHECK-LE: call <16 x i8> @vec_sro(unsigned char vector[16], unsigned char vector[16])
104 // CHECK: extractelement <2 x i64> %{{[0-9a-zA-Z_.]+}}, i32 0
105 // CHECK: store i64 0, ptr %{{[0-9a-zA-Z_.]+}}, align 8
106 // CHECK: store i64 0, ptr %{{[0-9a-zA-Z_.]+}}, align 8
107 
108 void __attribute__((noinline))
test_hadd()109 test_hadd() {
110   resi = _mm_hadd_epi16(mi1, mi2);
111   resi = _mm_hadd_epi32(mi1, mi2);
112   res = _mm_hadd_pi16(m1, m2);
113   res = _mm_hadd_pi32(m1, m2);
114   resi = _mm_hadds_epi16(mi1, mi2);
115   res = _mm_hadds_pi16(m1, m2);
116 }
117 
118 // CHECK-LABEL: @test_hadd
119 
120 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hadd_epi16
121 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
122 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
123 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
124 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
125 // CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
126 
127 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hadd_epi32
128 // CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
129 // CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
130 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
131 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
132 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
133 
134 // CHECK-LABEL: define available_externally i64 @_mm_hadd_pi16
135 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
136 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
137 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
138 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
139 // CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
140 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
141 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
142 
143 // CHECK-LABEL: define available_externally i64 @_mm_hadd_pi32
144 // CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
145 // CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
146 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
147 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
148 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
149 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
150 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
151 
152 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hadds_epi16
153 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
154 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
155 // CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])
156 // CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])
157 // CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
158 
159 // CHECK-LABEL: define available_externally i64 @_mm_hadds_pi16
160 // CHECK: call <4 x i32> @vec_sum4s(short vector[8], int vector[4])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
161 // CHECK: call <8 x i16> @vec_packs(int vector[4], int vector[4])
162 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
163 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
164 
165 void __attribute__((noinline))
test_hsub()166 test_hsub() {
167   resi = _mm_hsub_epi16(mi1, mi2);
168   resi = _mm_hsub_epi32(mi1, mi2);
169   res = _mm_hsub_pi16(m1, m2);
170   res = _mm_hsub_pi32(m1, m2);
171   resi = _mm_hsubs_epi16(mi1, mi2);
172   res = _mm_hsubs_pi16(m1, m2);
173 }
174 
175 // CHECK-LABEL: @test_hsub
176 
177 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hsub_epi16
178 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
179 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
180 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
181 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
182 // CHECK: call <8 x i16> @vec_sub(short vector[8], short vector[8])
183 
184 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hsub_epi32
185 // CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
186 // CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
187 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 16, i8 17, i8 18, i8 19, i8 24, i8 25, i8 26, i8 27>)
188 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 20, i8 21, i8 22, i8 23, i8 28, i8 29, i8 30, i8 31>)
189 // CHECK: call <4 x i32> @vec_sub(int vector[4], int vector[4])
190 
191 // CHECK-LABEL: define available_externally i64 @_mm_hsub_pi16
192 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
193 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
194 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
195 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
196 // CHECK: call <8 x i16> @vec_sub(short vector[8], short vector[8])
197 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
198 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
199 
200 // CHECK-LABEL: define available_externally i64 @_mm_hsub_pi32
201 // CHECK: store <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
202 // CHECK: store <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
203 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15>)
204 // CHECK: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11, i8 0, i8 1, i8 2, i8 3, i8 8, i8 9, i8 10, i8 11>)
205 // CHECK: call <4 x i32> @vec_sub(int vector[4], int vector[4])
206 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
207 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
208 
209 // CHECK-LABEL: define available_externally <2 x i64> @_mm_hsubs_epi16
210 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
211 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
212 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
213 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
214 // CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
215 
216 // CHECK-LABEL: define available_externally i64 @_mm_hsubs_pi16
217 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
218 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
219 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13>)
220 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15>)
221 // CHECK: call <8 x i16> @vec_subs(short vector[8], short vector[8])
222 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
223 // CHECK: extractelement <2 x i64> %[[CAST]], i32 1
224 
225 void __attribute__((noinline))
test_shuffle()226 test_shuffle() {
227   resi = _mm_shuffle_epi8(mi1, mi2);
228   res = _mm_shuffle_pi8(m1, m2);
229 }
230 
231 // CHECK-LABEL: @test_shuffle
232 
233 // CHECK-LABEL: define available_externally <2 x i64> @_mm_shuffle_epi8
234 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
235 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
236 // CHECK: call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])
237 // CHECK: call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
238 
239 // CHECK-LABEL: define available_externally i64 @_mm_shuffle_pi8
240 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
241 // CHECK: call <16 x i8> @vec_perm(signed char vector[16], signed char vector[16], unsigned char vector[16])
242 // CHECK: call <16 x i8> @vec_sel(signed char vector[16], signed char vector[16], bool vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer, <16 x i8> noundef %{{[0-9a-zA-Z_.]+}})
243 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
244 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
245 
246 void __attribute__((noinline))
test_sign()247 test_sign() {
248   resi = _mm_sign_epi8(mi1, mi2);
249   resi = _mm_sign_epi16(mi1, mi2);
250   resi = _mm_sign_epi32(mi1, mi2);
251   res = _mm_sign_pi8(m1, m2);
252   res = _mm_sign_pi16(m1, m2);
253   res = _mm_sign_pi32(m1, m2);
254 }
255 
256 // CHECK-LABEL: @test_sign
257 
258 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi8
259 // CHECK: call <16 x i8> @vec_cmplt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
260 // CHECK: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef zeroinitializer)
261 // CHECK: call <16 x i8> @vec_neg(signed char vector[16])
262 // CHECK: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])
263 // CHECK: call <16 x i8> @vec_mul(signed char vector[16], signed char vector[16])
264 
265 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi16
266 // CHECK: call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
267 // CHECK: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef zeroinitializer)
268 // CHECK: call <8 x i16> @vec_neg(short vector[8])
269 // CHECK: call <8 x i16> @vec_add(short vector[8], short vector[8])
270 // CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
271 
272 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sign_epi32
273 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
274 // CHECK: call <4 x i32> @vec_cmplt(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
275 // CHECK: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef zeroinitializer)
276 // CHECK: call <4 x i32> @vec_neg(int vector[4])
277 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
278 // CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
279 
280 // CHECK-LABEL: define available_externally i64 @_mm_sign_pi8
281 // CHECK: store <16 x i8> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
282 // CHECK: call <2 x i64> @_mm_sign_epi8
283 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <16 x i8> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
284 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
285 
286 // CHECK-LABEL: define available_externally i64 @_mm_sign_pi16
287 // CHECK: store <8 x i16> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
288 // CHECK: call <2 x i64> @_mm_sign_epi16
289 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
290 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
291 
292 // CHECK-LABEL: define available_externally i64 @_mm_sign_pi32
293 // CHECK: store <4 x i32> zeroinitializer, ptr %{{[0-9a-zA-Z_.]+}}, align 16
294 // CHECK: call <2 x i64> @_mm_sign_epi32
295 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <4 x i32> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
296 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
297 
298 void __attribute__((noinline))
test_maddubs()299 test_maddubs() {
300   resi = _mm_maddubs_epi16(mi1, mi2);
301   res = _mm_maddubs_pi16(m1, m2);
302 }
303 
304 // CHECK-LABEL: @test_maddubs
305 
306 // CHECK-LABEL: define available_externally <2 x i64> @_mm_maddubs_epi16
307 // CHECK: call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
308 // CHECK: call <8 x i16> @vec_unpackh(signed char vector[16])
309 // CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
310 // CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
311 // CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
312 // CHECK: call <8 x i16> @vec_unpackh(signed char vector[16])
313 // CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
314 // CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
315 // CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
316 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
317 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
318 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
319 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
320 // CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
321 
322 // CHECK-LABEL: define available_externally i64 @_mm_maddubs_pi16
323 // CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
324 // CHECK: call <8 x i16> @vec_splats(short)(i16 noundef signext 255)
325 // CHECK: call <8 x i16> @vec_and(short vector[8], short vector[8])
326 // CHECK: call <8 x i16> @vec_unpackl(signed char vector[16])
327 // CHECK: call <8 x i16> @vec_mul(short vector[8], short vector[8])
328 // CHECK: store <16 x i8> <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
329 // CHECK: store <16 x i8> <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
330 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 0, i8 1, i8 4, i8 5, i8 8, i8 9, i8 12, i8 13, i8 16, i8 17, i8 20, i8 21, i8 24, i8 25, i8 28, i8 29>)
331 // CHECK: call <8 x i16> @vec_perm(short vector[8], short vector[8], unsigned char vector[16])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <16 x i8> noundef <i8 2, i8 3, i8 6, i8 7, i8 10, i8 11, i8 14, i8 15, i8 18, i8 19, i8 22, i8 23, i8 26, i8 27, i8 30, i8 31>)
332 // CHECK: call <8 x i16> @vec_adds(short vector[8], short vector[8])
333 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
334 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
335 
336 void __attribute__((noinline))
test_mulhrs()337 test_mulhrs() {
338   resi = _mm_mulhrs_epi16(mi1, mi2);
339   res = _mm_mulhrs_pi16(m1, m2);
340 }
341 
342 // CHECK-LABEL: @test_mulhrs
343 
344 // CHECK-LABEL: define available_externally <2 x i64> @_mm_mulhrs_epi16
345 // CHECK: call <4 x i32> @vec_unpackh(short vector[8])
346 // CHECK: call <4 x i32> @vec_unpackh(short vector[8])
347 // CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
348 // CHECK: call <4 x i32> @vec_unpackl(short vector[8])
349 // CHECK: call <4 x i32> @vec_unpackl(short vector[8])
350 // CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
351 // CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
352 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
353 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
354 // CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
355 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
356 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
357 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
358 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
359 // CHECK: %[[PACK:[0-9a-zA-Z_.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])
360 
361 // CHECK-LABEL: define available_externally i64 @_mm_mulhrs_pi16
362 // CHECK: call <4 x i32> @vec_unpackh(short vector[8])
363 // CHECK: call <4 x i32> @vec_unpackh(short vector[8])
364 // CHECK: call <4 x i32> @vec_mul(int vector[4], int vector[4])
365 // CHECK: call <4 x i32> @vec_splats(unsigned int)(i32 noundef zeroext 14)
366 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
367 // CHECK: call <4 x i32> @vec_splats(int)(i32 noundef signext 1)
368 // CHECK: call <4 x i32> @vec_add(int vector[4], int vector[4])
369 // CHECK: call <4 x i32> @vec_sr(int vector[4], unsigned int vector[4])
370 // CHECK: call <8 x i16> @vec_pack(int vector[4], int vector[4])
371 // CHECK: %[[CAST:[0-9a-zA-Z_.]+]] = bitcast <8 x i16> %{{[0-9a-zA-Z_.]+}} to <2 x i64>
372 // CHECK: extractelement <2 x i64> %[[CAST]], i32 0
373