xref: /llvm-project/llvm/test/CodeGen/X86/vec_sdiv_to_shift.ll (revision 6599961c17073204ac868958e632cf4d92353cbe)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5
6define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
7; SSE-LABEL: sdiv_vec8x16:
8; SSE:       # %bb.0: # %entry
9; SSE-NEXT:    movdqa %xmm0, %xmm1
10; SSE-NEXT:    psraw $15, %xmm1
11; SSE-NEXT:    psrlw $11, %xmm1
12; SSE-NEXT:    paddw %xmm1, %xmm0
13; SSE-NEXT:    psraw $5, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: sdiv_vec8x16:
17; AVX:       # %bb.0: # %entry
18; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
19; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
20; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
21; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
22; AVX-NEXT:    retq
23entry:
24  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
25  ret <8 x i16> %0
26}
27
28define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
29; SSE-LABEL: sdiv_vec8x16_minsize:
30; SSE:       # %bb.0: # %entry
31; SSE-NEXT:    movdqa %xmm0, %xmm1
32; SSE-NEXT:    psraw $15, %xmm1
33; SSE-NEXT:    psrlw $11, %xmm1
34; SSE-NEXT:    paddw %xmm1, %xmm0
35; SSE-NEXT:    psraw $5, %xmm0
36; SSE-NEXT:    retq
37;
38; AVX-LABEL: sdiv_vec8x16_minsize:
39; AVX:       # %bb.0: # %entry
40; AVX-NEXT:    vpsraw $15, %xmm0, %xmm1
41; AVX-NEXT:    vpsrlw $11, %xmm1, %xmm1
42; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
43; AVX-NEXT:    vpsraw $5, %xmm0, %xmm0
44; AVX-NEXT:    retq
45entry:
46  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
47  ret <8 x i16> %0
48}
49
50define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
51; SSE-LABEL: sdiv_vec4x32:
52; SSE:       # %bb.0: # %entry
53; SSE-NEXT:    movdqa %xmm0, %xmm1
54; SSE-NEXT:    psrad $31, %xmm1
55; SSE-NEXT:    psrld $28, %xmm1
56; SSE-NEXT:    paddd %xmm1, %xmm0
57; SSE-NEXT:    psrad $4, %xmm0
58; SSE-NEXT:    retq
59;
60; AVX-LABEL: sdiv_vec4x32:
61; AVX:       # %bb.0: # %entry
62; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
63; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
64; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
65; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
66; AVX-NEXT:    retq
67entry:
68%0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
69ret <4 x i32> %0
70}
71
72define <4 x i32> @sdiv_negative(<4 x i32> %var) {
73; SSE-LABEL: sdiv_negative:
74; SSE:       # %bb.0: # %entry
75; SSE-NEXT:    movdqa %xmm0, %xmm1
76; SSE-NEXT:    psrad $31, %xmm1
77; SSE-NEXT:    psrld $28, %xmm1
78; SSE-NEXT:    paddd %xmm0, %xmm1
79; SSE-NEXT:    psrad $4, %xmm1
80; SSE-NEXT:    pxor %xmm0, %xmm0
81; SSE-NEXT:    psubd %xmm1, %xmm0
82; SSE-NEXT:    retq
83;
84; AVX-LABEL: sdiv_negative:
85; AVX:       # %bb.0: # %entry
86; AVX-NEXT:    vpsrad $31, %xmm0, %xmm1
87; AVX-NEXT:    vpsrld $28, %xmm1, %xmm1
88; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
89; AVX-NEXT:    vpsrad $4, %xmm0, %xmm0
90; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
91; AVX-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
92; AVX-NEXT:    retq
93entry:
94%0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
95ret <4 x i32> %0
96}
97
98define <8 x i32> @sdiv8x32(<8 x i32> %var) {
99; SSE-LABEL: sdiv8x32:
100; SSE:       # %bb.0: # %entry
101; SSE-NEXT:    movdqa %xmm0, %xmm2
102; SSE-NEXT:    psrad $31, %xmm2
103; SSE-NEXT:    psrld $26, %xmm2
104; SSE-NEXT:    paddd %xmm2, %xmm0
105; SSE-NEXT:    psrad $6, %xmm0
106; SSE-NEXT:    movdqa %xmm1, %xmm2
107; SSE-NEXT:    psrad $31, %xmm2
108; SSE-NEXT:    psrld $26, %xmm2
109; SSE-NEXT:    paddd %xmm2, %xmm1
110; SSE-NEXT:    psrad $6, %xmm1
111; SSE-NEXT:    retq
112;
113; AVX1-LABEL: sdiv8x32:
114; AVX1:       # %bb.0: # %entry
115; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
116; AVX1-NEXT:    vpsrld $26, %xmm1, %xmm1
117; AVX1-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
118; AVX1-NEXT:    vpsrad $6, %xmm1, %xmm1
119; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
120; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm2
121; AVX1-NEXT:    vpsrld $26, %xmm2, %xmm2
122; AVX1-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
123; AVX1-NEXT:    vpsrad $6, %xmm0, %xmm0
124; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
125; AVX1-NEXT:    retq
126;
127; AVX2-LABEL: sdiv8x32:
128; AVX2:       # %bb.0: # %entry
129; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm1
130; AVX2-NEXT:    vpsrld $26, %ymm1, %ymm1
131; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
132; AVX2-NEXT:    vpsrad $6, %ymm0, %ymm0
133; AVX2-NEXT:    retq
134entry:
135%0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
136ret <8 x i32> %0
137}
138
139define <16 x i16> @sdiv16x16(<16 x i16> %var) {
140; SSE-LABEL: sdiv16x16:
141; SSE:       # %bb.0: # %entry
142; SSE-NEXT:    movdqa %xmm0, %xmm2
143; SSE-NEXT:    psraw $15, %xmm2
144; SSE-NEXT:    psrlw $14, %xmm2
145; SSE-NEXT:    paddw %xmm2, %xmm0
146; SSE-NEXT:    psraw $2, %xmm0
147; SSE-NEXT:    movdqa %xmm1, %xmm2
148; SSE-NEXT:    psraw $15, %xmm2
149; SSE-NEXT:    psrlw $14, %xmm2
150; SSE-NEXT:    paddw %xmm2, %xmm1
151; SSE-NEXT:    psraw $2, %xmm1
152; SSE-NEXT:    retq
153;
154; AVX1-LABEL: sdiv16x16:
155; AVX1:       # %bb.0: # %entry
156; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm1
157; AVX1-NEXT:    vpsrlw $14, %xmm1, %xmm1
158; AVX1-NEXT:    vpaddw %xmm1, %xmm0, %xmm1
159; AVX1-NEXT:    vpsraw $2, %xmm1, %xmm1
160; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
161; AVX1-NEXT:    vpsraw $15, %xmm0, %xmm2
162; AVX1-NEXT:    vpsrlw $14, %xmm2, %xmm2
163; AVX1-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
164; AVX1-NEXT:    vpsraw $2, %xmm0, %xmm0
165; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
166; AVX1-NEXT:    retq
167;
168; AVX2-LABEL: sdiv16x16:
169; AVX2:       # %bb.0: # %entry
170; AVX2-NEXT:    vpsraw $15, %ymm0, %ymm1
171; AVX2-NEXT:    vpsrlw $14, %ymm1, %ymm1
172; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
173; AVX2-NEXT:    vpsraw $2, %ymm0, %ymm0
174; AVX2-NEXT:    retq
175entry:
176  %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
177  ret <16 x i16> %a0
178}
179
180; Div-by-0 in any lane is UB.
181
182define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
183; SSE-LABEL: sdiv_non_splat:
184; SSE:       # %bb.0:
185; SSE-NEXT:    retq
186;
187; AVX-LABEL: sdiv_non_splat:
188; AVX:       # %bb.0:
189; AVX-NEXT:    retq
190  %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>
191  ret <4 x i32> %y
192}
193