xref: /llvm-project/llvm/test/CodeGen/X86/logic-shift.ll (revision 86eff6be686a1e41e13c08ebfc2db4dd4d58e7c6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s
3
4define i8 @or_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
5; CHECK-LABEL: or_lshr_commute0:
6; CHECK:       # %bb.0:
7; CHECK-NEXT:    movl %ecx, %eax
8; CHECK-NEXT:    movl %edx, %ecx
9; CHECK-NEXT:    orl %esi, %edi
10; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
11; CHECK-NEXT:    shrb %cl, %dil
12; CHECK-NEXT:    orb %dil, %al
13; CHECK-NEXT:    # kill: def $al killed $al killed $eax
14; CHECK-NEXT:    retq
15  %sh1 = lshr i8 %x0, %y
16  %sh2 = lshr i8 %x1, %y
17  %logic = or i8 %sh1, %z
18  %r = or i8 %logic, %sh2
19  ret i8 %r
20}
21
22define i32 @or_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
23; CHECK-LABEL: or_lshr_commute1:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    movl %ecx, %eax
26; CHECK-NEXT:    movl %edx, %ecx
27; CHECK-NEXT:    orl %esi, %edi
28; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
29; CHECK-NEXT:    shrl %cl, %edi
30; CHECK-NEXT:    orl %edi, %eax
31; CHECK-NEXT:    retq
32  %sh1 = lshr i32 %x0, %y
33  %sh2 = lshr i32 %x1, %y
34  %logic = or i32 %z, %sh1
35  %r = or i32 %logic, %sh2
36  ret i32 %r
37}
38
39define <8 x i16> @or_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
40; CHECK-LABEL: or_lshr_commute2:
41; CHECK:       # %bb.0:
42; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
43; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
44; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
45; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
46; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
47; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
48; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
49; CHECK-NEXT:    vzeroupper
50; CHECK-NEXT:    retq
51  %sh1 = lshr <8 x i16> %x0, %y
52  %sh2 = lshr <8 x i16> %x1, %y
53  %logic = or <8 x i16> %sh1, %z
54  %r = or <8 x i16> %sh2, %logic
55  ret <8 x i16> %r
56}
57
58define <2 x i64> @or_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
59; CHECK-LABEL: or_lshr_commute3:
60; CHECK:       # %bb.0:
61; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
62; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
63; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
64; CHECK-NEXT:    retq
65  %sh1 = lshr <2 x i64> %x0, %y
66  %sh2 = lshr <2 x i64> %x1, %y
67  %logic = or <2 x i64> %z, %sh1
68  %r = or <2 x i64> %sh2, %logic
69  ret <2 x i64> %r
70}
71
72define i16 @or_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
73; CHECK-LABEL: or_ashr_commute0:
74; CHECK:       # %bb.0:
75; CHECK-NEXT:    movl %ecx, %r8d
76; CHECK-NEXT:    movl %edx, %ecx
77; CHECK-NEXT:    orl %esi, %edi
78; CHECK-NEXT:    movswl %di, %eax
79; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
80; CHECK-NEXT:    sarl %cl, %eax
81; CHECK-NEXT:    orl %r8d, %eax
82; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
83; CHECK-NEXT:    retq
84  %sh1 = ashr i16 %x0, %y
85  %sh2 = ashr i16 %x1, %y
86  %logic = or i16 %sh1, %z
87  %r = or i16 %logic, %sh2
88  ret i16 %r
89}
90
91define i64 @or_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
92; CHECK-LABEL: or_ashr_commute1:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    movq %rcx, %rax
95; CHECK-NEXT:    movq %rdx, %rcx
96; CHECK-NEXT:    orq %rsi, %rdi
97; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
98; CHECK-NEXT:    sarq %cl, %rdi
99; CHECK-NEXT:    orq %rdi, %rax
100; CHECK-NEXT:    retq
101  %sh1 = ashr i64 %x0, %y
102  %sh2 = ashr i64 %x1, %y
103  %logic = or i64 %z, %sh1
104  %r = or i64 %logic, %sh2
105  ret i64 %r
106}
107
108define <4 x i32> @or_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
109; CHECK-LABEL: or_ashr_commute2:
110; CHECK:       # %bb.0:
111; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
112; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
113; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
114; CHECK-NEXT:    retq
115  %sh1 = ashr <4 x i32> %x0, %y
116  %sh2 = ashr <4 x i32> %x1, %y
117  %logic = or <4 x i32> %sh1, %z
118  %r = or <4 x i32> %sh2, %logic
119  ret <4 x i32> %r
120}
121
122define <16 x i8> @or_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
123; CHECK-LABEL: or_ashr_commute3:
124; CHECK:       # %bb.0:
125; CHECK-NEXT:    vpsllw $5, %xmm2, %xmm2
126; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
127; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
128; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
129; CHECK-NEXT:    vpsraw $4, %xmm1, %xmm5
130; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
131; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm5
132; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
133; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
134; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm5
135; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
136; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
137; CHECK-NEXT:    vpsrlw $8, %xmm1, %xmm1
138; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
139; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
140; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm4
141; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
142; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm4
143; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
144; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
145; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm4
146; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
147; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
148; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
149; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
150; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
151; CHECK-NEXT:    retq
152  %sh1 = ashr <16 x i8> %x0, %y
153  %sh2 = ashr <16 x i8> %x1, %y
154  %logic = or <16 x i8> %z, %sh1
155  %r = or <16 x i8> %sh2, %logic
156  ret <16 x i8> %r
157}
158
159define i32 @or_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
160; CHECK-LABEL: or_shl_commute0:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    movl %ecx, %eax
163; CHECK-NEXT:    movl %edx, %ecx
164; CHECK-NEXT:    orl %esi, %edi
165; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
166; CHECK-NEXT:    shll %cl, %edi
167; CHECK-NEXT:    orl %edi, %eax
168; CHECK-NEXT:    retq
169  %sh1 = shl i32 %x0, %y
170  %sh2 = shl i32 %x1, %y
171  %logic = or i32 %sh1, %z
172  %r = or i32 %logic, %sh2
173  ret i32 %r
174}
175
176define i8 @or_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
177; CHECK-LABEL: or_shl_commute1:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    movl %ecx, %eax
180; CHECK-NEXT:    movl %edx, %ecx
181; CHECK-NEXT:    orl %esi, %edi
182; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
183; CHECK-NEXT:    shlb %cl, %dil
184; CHECK-NEXT:    orb %dil, %al
185; CHECK-NEXT:    # kill: def $al killed $al killed $eax
186; CHECK-NEXT:    retq
187  %sh1 = shl i8 %x0, %y
188  %sh2 = shl i8 %x1, %y
189  %logic = or i8 %z, %sh1
190  %r = or i8 %logic, %sh2
191  ret i8 %r
192}
193
194define <2 x i64> @or_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
195; CHECK-LABEL: or_shl_commute2:
196; CHECK:       # %bb.0:
197; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
198; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
199; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
200; CHECK-NEXT:    retq
201  %sh1 = shl <2 x i64> %x0, %y
202  %sh2 = shl <2 x i64> %x1, %y
203  %logic = or <2 x i64> %sh1, %z
204  %r = or <2 x i64> %sh2, %logic
205  ret <2 x i64> %r
206}
207
208define <8 x i16> @or_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
209; CHECK-LABEL: or_shl_commute3:
210; CHECK:       # %bb.0:
211; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
212; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
213; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
214; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
215; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
216; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
217; CHECK-NEXT:    vpor %xmm3, %xmm0, %xmm0
218; CHECK-NEXT:    vzeroupper
219; CHECK-NEXT:    retq
220  %sh1 = shl <8 x i16> %x0, %y
221  %sh2 = shl <8 x i16> %x1, %y
222  %logic = or <8 x i16> %z, %sh1
223  %r = or <8 x i16> %sh2, %logic
224  ret <8 x i16> %r
225}
226
227; negative test - mismatched shift opcodes
228
229define i64 @or_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
230; CHECK-LABEL: or_mix_shr:
231; CHECK:       # %bb.0:
232; CHECK-NEXT:    movq %rcx, %rax
233; CHECK-NEXT:    movq %rdx, %rcx
234; CHECK-NEXT:    sarq %cl, %rdi
235; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
236; CHECK-NEXT:    shrq %cl, %rsi
237; CHECK-NEXT:    orq %rdi, %rax
238; CHECK-NEXT:    orq %rsi, %rax
239; CHECK-NEXT:    retq
240  %sh1 = ashr i64 %x0, %y
241  %sh2 = lshr i64 %x1, %y
242  %logic = or i64 %sh1, %z
243  %r = or i64 %logic, %sh2
244  ret i64 %r
245}
246
247; negative test - mismatched shift amounts
248
249define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
250; CHECK-LABEL: or_lshr_mix_shift_amount:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    movq %rcx, %rax
253; CHECK-NEXT:    movq %rdx, %rcx
254; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
255; CHECK-NEXT:    shrq %cl, %rdi
256; CHECK-NEXT:    movl %r8d, %ecx
257; CHECK-NEXT:    shrq %cl, %rsi
258; CHECK-NEXT:    orq %rdi, %rax
259; CHECK-NEXT:    orq %rsi, %rax
260; CHECK-NEXT:    retq
261  %sh1 = lshr i64 %x0, %y
262  %sh2 = lshr i64 %x1, %w
263  %logic = or i64 %sh1, %z
264  %r = or i64 %logic, %sh2
265  ret i64 %r
266}
267
268; negative test - mismatched logic opcodes
269
270define i64 @mix_logic_lshr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
271; CHECK-LABEL: mix_logic_lshr:
272; CHECK:       # %bb.0:
273; CHECK-NEXT:    movq %rcx, %rax
274; CHECK-NEXT:    movq %rdx, %rcx
275; CHECK-NEXT:    shrq %cl, %rdi
276; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
277; CHECK-NEXT:    shrq %cl, %rsi
278; CHECK-NEXT:    xorq %rdi, %rax
279; CHECK-NEXT:    orq %rsi, %rax
280; CHECK-NEXT:    retq
281  %sh1 = lshr i64 %x0, %y
282  %sh2 = lshr i64 %x1, %y
283  %logic = xor i64 %sh1, %z
284  %r = or i64 %logic, %sh2
285  ret i64 %r
286}
287
288define i8 @xor_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
289; CHECK-LABEL: xor_lshr_commute0:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    movl %ecx, %eax
292; CHECK-NEXT:    movl %edx, %ecx
293; CHECK-NEXT:    xorl %esi, %edi
294; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
295; CHECK-NEXT:    shrb %cl, %dil
296; CHECK-NEXT:    xorb %dil, %al
297; CHECK-NEXT:    # kill: def $al killed $al killed $eax
298; CHECK-NEXT:    retq
299  %sh1 = lshr i8 %x0, %y
300  %sh2 = lshr i8 %x1, %y
301  %logic = xor i8 %sh1, %z
302  %r = xor i8 %logic, %sh2
303  ret i8 %r
304}
305
306define i32 @xor_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
307; CHECK-LABEL: xor_lshr_commute1:
308; CHECK:       # %bb.0:
309; CHECK-NEXT:    movl %ecx, %eax
310; CHECK-NEXT:    movl %edx, %ecx
311; CHECK-NEXT:    xorl %esi, %edi
312; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
313; CHECK-NEXT:    shrl %cl, %edi
314; CHECK-NEXT:    xorl %edi, %eax
315; CHECK-NEXT:    retq
316  %sh1 = lshr i32 %x0, %y
317  %sh2 = lshr i32 %x1, %y
318  %logic = xor i32 %z, %sh1
319  %r = xor i32 %logic, %sh2
320  ret i32 %r
321}
322
323define <8 x i16> @xor_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
324; CHECK-LABEL: xor_lshr_commute2:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
327; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
328; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
329; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
330; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
331; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
332; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
333; CHECK-NEXT:    vzeroupper
334; CHECK-NEXT:    retq
335  %sh1 = lshr <8 x i16> %x0, %y
336  %sh2 = lshr <8 x i16> %x1, %y
337  %logic = xor <8 x i16> %sh1, %z
338  %r = xor <8 x i16> %sh2, %logic
339  ret <8 x i16> %r
340}
341
342define <2 x i64> @xor_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
343; CHECK-LABEL: xor_lshr_commute3:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
346; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
347; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
348; CHECK-NEXT:    retq
349  %sh1 = lshr <2 x i64> %x0, %y
350  %sh2 = lshr <2 x i64> %x1, %y
351  %logic = xor <2 x i64> %z, %sh1
352  %r = xor <2 x i64> %sh2, %logic
353  ret <2 x i64> %r
354}
355
356define i16 @xor_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
357; CHECK-LABEL: xor_ashr_commute0:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    movl %ecx, %r8d
360; CHECK-NEXT:    movl %edx, %ecx
361; CHECK-NEXT:    xorl %esi, %edi
362; CHECK-NEXT:    movswl %di, %eax
363; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
364; CHECK-NEXT:    sarl %cl, %eax
365; CHECK-NEXT:    xorl %r8d, %eax
366; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
367; CHECK-NEXT:    retq
368  %sh1 = ashr i16 %x0, %y
369  %sh2 = ashr i16 %x1, %y
370  %logic = xor i16 %sh1, %z
371  %r = xor i16 %logic, %sh2
372  ret i16 %r
373}
374
375define i64 @xor_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
376; CHECK-LABEL: xor_ashr_commute1:
377; CHECK:       # %bb.0:
378; CHECK-NEXT:    movq %rcx, %rax
379; CHECK-NEXT:    movq %rdx, %rcx
380; CHECK-NEXT:    xorq %rsi, %rdi
381; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
382; CHECK-NEXT:    sarq %cl, %rdi
383; CHECK-NEXT:    xorq %rdi, %rax
384; CHECK-NEXT:    retq
385  %sh1 = ashr i64 %x0, %y
386  %sh2 = ashr i64 %x1, %y
387  %logic = xor i64 %z, %sh1
388  %r = xor i64 %logic, %sh2
389  ret i64 %r
390}
391
392define <4 x i32> @xor_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
393; CHECK-LABEL: xor_ashr_commute2:
394; CHECK:       # %bb.0:
395; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
396; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
397; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
398; CHECK-NEXT:    retq
399  %sh1 = ashr <4 x i32> %x0, %y
400  %sh2 = ashr <4 x i32> %x1, %y
401  %logic = xor <4 x i32> %sh1, %z
402  %r = xor <4 x i32> %sh2, %logic
403  ret <4 x i32> %r
404}
405
406define <16 x i8> @xor_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
407; CHECK-LABEL: xor_ashr_commute3:
408; CHECK:       # %bb.0:
409; CHECK-NEXT:    vpsllw $5, %xmm2, %xmm2
410; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
411; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
412; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
413; CHECK-NEXT:    vpsraw $4, %xmm1, %xmm5
414; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
415; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm5
416; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
417; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
418; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm5
419; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
420; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
421; CHECK-NEXT:    vpsrlw $8, %xmm1, %xmm1
422; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
423; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
424; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm4
425; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
426; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm4
427; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
428; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
429; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm4
430; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
431; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
432; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
433; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
434; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
435; CHECK-NEXT:    retq
436  %sh1 = ashr <16 x i8> %x0, %y
437  %sh2 = ashr <16 x i8> %x1, %y
438  %logic = xor <16 x i8> %z, %sh1
439  %r = xor <16 x i8> %sh2, %logic
440  ret <16 x i8> %r
441}
442
443define i32 @xor_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
444; CHECK-LABEL: xor_shl_commute0:
445; CHECK:       # %bb.0:
446; CHECK-NEXT:    movl %ecx, %eax
447; CHECK-NEXT:    movl %edx, %ecx
448; CHECK-NEXT:    xorl %esi, %edi
449; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
450; CHECK-NEXT:    shll %cl, %edi
451; CHECK-NEXT:    xorl %edi, %eax
452; CHECK-NEXT:    retq
453  %sh1 = shl i32 %x0, %y
454  %sh2 = shl i32 %x1, %y
455  %logic = xor i32 %sh1, %z
456  %r = xor i32 %logic, %sh2
457  ret i32 %r
458}
459
460define i8 @xor_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
461; CHECK-LABEL: xor_shl_commute1:
462; CHECK:       # %bb.0:
463; CHECK-NEXT:    movl %ecx, %eax
464; CHECK-NEXT:    movl %edx, %ecx
465; CHECK-NEXT:    xorl %esi, %edi
466; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
467; CHECK-NEXT:    shlb %cl, %dil
468; CHECK-NEXT:    xorb %dil, %al
469; CHECK-NEXT:    # kill: def $al killed $al killed $eax
470; CHECK-NEXT:    retq
471  %sh1 = shl i8 %x0, %y
472  %sh2 = shl i8 %x1, %y
473  %logic = xor i8 %z, %sh1
474  %r = xor i8 %logic, %sh2
475  ret i8 %r
476}
477
478define <2 x i64> @xor_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
479; CHECK-LABEL: xor_shl_commute2:
480; CHECK:       # %bb.0:
481; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
482; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
483; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
484; CHECK-NEXT:    retq
485  %sh1 = shl <2 x i64> %x0, %y
486  %sh2 = shl <2 x i64> %x1, %y
487  %logic = xor <2 x i64> %sh1, %z
488  %r = xor <2 x i64> %sh2, %logic
489  ret <2 x i64> %r
490}
491
492define <8 x i16> @xor_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
493; CHECK-LABEL: xor_shl_commute3:
494; CHECK:       # %bb.0:
495; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
496; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
497; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
498; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
499; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
500; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
501; CHECK-NEXT:    vpxor %xmm3, %xmm0, %xmm0
502; CHECK-NEXT:    vzeroupper
503; CHECK-NEXT:    retq
504  %sh1 = shl <8 x i16> %x0, %y
505  %sh2 = shl <8 x i16> %x1, %y
506  %logic = xor <8 x i16> %z, %sh1
507  %r = xor <8 x i16> %sh2, %logic
508  ret <8 x i16> %r
509}
510
511; negative test - mismatched shift opcodes
512
513define i64 @xor_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
514; CHECK-LABEL: xor_mix_shr:
515; CHECK:       # %bb.0:
516; CHECK-NEXT:    movq %rcx, %rax
517; CHECK-NEXT:    movq %rdx, %rcx
518; CHECK-NEXT:    sarq %cl, %rdi
519; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
520; CHECK-NEXT:    shrq %cl, %rsi
521; CHECK-NEXT:    xorq %rdi, %rax
522; CHECK-NEXT:    xorq %rsi, %rax
523; CHECK-NEXT:    retq
524  %sh1 = ashr i64 %x0, %y
525  %sh2 = lshr i64 %x1, %y
526  %logic = xor i64 %sh1, %z
527  %r = xor i64 %logic, %sh2
528  ret i64 %r
529}
530
531; negative test - mismatched shift amounts
532
533define i64 @xor_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
534; CHECK-LABEL: xor_lshr_mix_shift_amount:
535; CHECK:       # %bb.0:
536; CHECK-NEXT:    movq %rcx, %rax
537; CHECK-NEXT:    movq %rdx, %rcx
538; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
539; CHECK-NEXT:    shrq %cl, %rdi
540; CHECK-NEXT:    movl %r8d, %ecx
541; CHECK-NEXT:    shrq %cl, %rsi
542; CHECK-NEXT:    xorq %rdi, %rax
543; CHECK-NEXT:    xorq %rsi, %rax
544; CHECK-NEXT:    retq
545  %sh1 = lshr i64 %x0, %y
546  %sh2 = lshr i64 %x1, %w
547  %logic = xor i64 %sh1, %z
548  %r = xor i64 %logic, %sh2
549  ret i64 %r
550}
551
552; negative test - mismatched logic opcodes
553
554define i64 @mix_logic_ashr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
555; CHECK-LABEL: mix_logic_ashr:
556; CHECK:       # %bb.0:
557; CHECK-NEXT:    movq %rcx, %rax
558; CHECK-NEXT:    movq %rdx, %rcx
559; CHECK-NEXT:    sarq %cl, %rdi
560; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
561; CHECK-NEXT:    sarq %cl, %rsi
562; CHECK-NEXT:    orq %rdi, %rax
563; CHECK-NEXT:    xorq %rsi, %rax
564; CHECK-NEXT:    retq
565  %sh1 = ashr i64 %x0, %y
566  %sh2 = ashr i64 %x1, %y
567  %logic = or i64 %sh1, %z
568  %r = xor i64 %logic, %sh2
569  ret i64 %r
570}
571
572define i8 @and_lshr_commute0(i8 %x0, i8 %x1, i8 %y, i8 %z) {
573; CHECK-LABEL: and_lshr_commute0:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    movl %ecx, %eax
576; CHECK-NEXT:    movl %edx, %ecx
577; CHECK-NEXT:    andl %esi, %edi
578; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
579; CHECK-NEXT:    shrb %cl, %dil
580; CHECK-NEXT:    andb %dil, %al
581; CHECK-NEXT:    # kill: def $al killed $al killed $eax
582; CHECK-NEXT:    retq
583  %sh1 = lshr i8 %x0, %y
584  %sh2 = lshr i8 %x1, %y
585  %logic = and i8 %sh1, %z
586  %r = and i8 %logic, %sh2
587  ret i8 %r
588}
589
590define i32 @and_lshr_commute1(i32 %x0, i32 %x1, i32 %y, i32 %z) {
591; CHECK-LABEL: and_lshr_commute1:
592; CHECK:       # %bb.0:
593; CHECK-NEXT:    movl %ecx, %eax
594; CHECK-NEXT:    movl %edx, %ecx
595; CHECK-NEXT:    andl %esi, %edi
596; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
597; CHECK-NEXT:    shrl %cl, %edi
598; CHECK-NEXT:    andl %edi, %eax
599; CHECK-NEXT:    retq
600  %sh1 = lshr i32 %x0, %y
601  %sh2 = lshr i32 %x1, %y
602  %logic = and i32 %z, %sh1
603  %r = and i32 %logic, %sh2
604  ret i32 %r
605}
606
607define <8 x i16> @and_lshr_commute2(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
608; CHECK-LABEL: and_lshr_commute2:
609; CHECK:       # %bb.0:
610; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
611; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
612; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
613; CHECK-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
614; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
615; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
616; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
617; CHECK-NEXT:    vzeroupper
618; CHECK-NEXT:    retq
619  %sh1 = lshr <8 x i16> %x0, %y
620  %sh2 = lshr <8 x i16> %x1, %y
621  %logic = and <8 x i16> %sh1, %z
622  %r = and <8 x i16> %sh2, %logic
623  ret <8 x i16> %r
624}
625
626define <2 x i64> @and_lshr_commute3(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
627; CHECK-LABEL: and_lshr_commute3:
628; CHECK:       # %bb.0:
629; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
630; CHECK-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
631; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
632; CHECK-NEXT:    retq
633  %sh1 = lshr <2 x i64> %x0, %y
634  %sh2 = lshr <2 x i64> %x1, %y
635  %logic = and <2 x i64> %z, %sh1
636  %r = and <2 x i64> %sh2, %logic
637  ret <2 x i64> %r
638}
639
640define i16 @and_ashr_commute0(i16 %x0, i16 %x1, i16 %y, i16 %z) {
641; CHECK-LABEL: and_ashr_commute0:
642; CHECK:       # %bb.0:
643; CHECK-NEXT:    movl %ecx, %r8d
644; CHECK-NEXT:    movl %edx, %ecx
645; CHECK-NEXT:    andl %esi, %edi
646; CHECK-NEXT:    movswl %di, %eax
647; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
648; CHECK-NEXT:    sarl %cl, %eax
649; CHECK-NEXT:    andl %r8d, %eax
650; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
651; CHECK-NEXT:    retq
652  %sh1 = ashr i16 %x0, %y
653  %sh2 = ashr i16 %x1, %y
654  %logic = and i16 %sh1, %z
655  %r = and i16 %logic, %sh2
656  ret i16 %r
657}
658
659define i64 @and_ashr_commute1(i64 %x0, i64 %x1, i64 %y, i64 %z) {
660; CHECK-LABEL: and_ashr_commute1:
661; CHECK:       # %bb.0:
662; CHECK-NEXT:    movq %rcx, %rax
663; CHECK-NEXT:    movq %rdx, %rcx
664; CHECK-NEXT:    andq %rsi, %rdi
665; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
666; CHECK-NEXT:    sarq %cl, %rdi
667; CHECK-NEXT:    andq %rdi, %rax
668; CHECK-NEXT:    retq
669  %sh1 = ashr i64 %x0, %y
670  %sh2 = ashr i64 %x1, %y
671  %logic = and i64 %z, %sh1
672  %r = and i64 %logic, %sh2
673  ret i64 %r
674}
675
676define <4 x i32> @and_ashr_commute2(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %y, <4 x i32> %z) {
677; CHECK-LABEL: and_ashr_commute2:
678; CHECK:       # %bb.0:
679; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
680; CHECK-NEXT:    vpsravd %xmm2, %xmm0, %xmm0
681; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
682; CHECK-NEXT:    retq
683  %sh1 = ashr <4 x i32> %x0, %y
684  %sh2 = ashr <4 x i32> %x1, %y
685  %logic = and <4 x i32> %sh1, %z
686  %r = and <4 x i32> %sh2, %logic
687  ret <4 x i32> %r
688}
689
690define <16 x i8> @and_ashr_commute3(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %y, <16 x i8> %z) {
691; CHECK-LABEL: and_ashr_commute3:
692; CHECK:       # %bb.0:
693; CHECK-NEXT:    vpsllw $5, %xmm2, %xmm2
694; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
695; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
696; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
697; CHECK-NEXT:    vpsraw $4, %xmm1, %xmm5
698; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
699; CHECK-NEXT:    vpsraw $2, %xmm1, %xmm5
700; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
701; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
702; CHECK-NEXT:    vpsraw $1, %xmm1, %xmm5
703; CHECK-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
704; CHECK-NEXT:    vpblendvb %xmm4, %xmm5, %xmm1, %xmm1
705; CHECK-NEXT:    vpsrlw $8, %xmm1, %xmm1
706; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
707; CHECK-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
708; CHECK-NEXT:    vpsraw $4, %xmm0, %xmm4
709; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
710; CHECK-NEXT:    vpsraw $2, %xmm0, %xmm4
711; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
712; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
713; CHECK-NEXT:    vpsraw $1, %xmm0, %xmm4
714; CHECK-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
715; CHECK-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
716; CHECK-NEXT:    vpsrlw $8, %xmm0, %xmm0
717; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
718; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
719; CHECK-NEXT:    retq
720  %sh1 = ashr <16 x i8> %x0, %y
721  %sh2 = ashr <16 x i8> %x1, %y
722  %logic = and <16 x i8> %z, %sh1
723  %r = and <16 x i8> %sh2, %logic
724  ret <16 x i8> %r
725}
726
727define i32 @and_shl_commute0(i32 %x0, i32 %x1, i32 %y, i32 %z) {
728; CHECK-LABEL: and_shl_commute0:
729; CHECK:       # %bb.0:
730; CHECK-NEXT:    movl %ecx, %eax
731; CHECK-NEXT:    movl %edx, %ecx
732; CHECK-NEXT:    andl %esi, %edi
733; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
734; CHECK-NEXT:    shll %cl, %edi
735; CHECK-NEXT:    andl %edi, %eax
736; CHECK-NEXT:    retq
737  %sh1 = shl i32 %x0, %y
738  %sh2 = shl i32 %x1, %y
739  %logic = and i32 %sh1, %z
740  %r = and i32 %logic, %sh2
741  ret i32 %r
742}
743
744define i8 @and_shl_commute1(i8 %x0, i8 %x1, i8 %y, i8 %z) {
745; CHECK-LABEL: and_shl_commute1:
746; CHECK:       # %bb.0:
747; CHECK-NEXT:    movl %ecx, %eax
748; CHECK-NEXT:    movl %edx, %ecx
749; CHECK-NEXT:    andl %esi, %edi
750; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
751; CHECK-NEXT:    shlb %cl, %dil
752; CHECK-NEXT:    andb %dil, %al
753; CHECK-NEXT:    # kill: def $al killed $al killed $eax
754; CHECK-NEXT:    retq
755  %sh1 = shl i8 %x0, %y
756  %sh2 = shl i8 %x1, %y
757  %logic = and i8 %z, %sh1
758  %r = and i8 %logic, %sh2
759  ret i8 %r
760}
761
762define <2 x i64> @and_shl_commute2(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %y, <2 x i64> %z) {
763; CHECK-LABEL: and_shl_commute2:
764; CHECK:       # %bb.0:
765; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
766; CHECK-NEXT:    vpsllvq %xmm2, %xmm0, %xmm0
767; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
768; CHECK-NEXT:    retq
769  %sh1 = shl <2 x i64> %x0, %y
770  %sh2 = shl <2 x i64> %x1, %y
771  %logic = and <2 x i64> %sh1, %z
772  %r = and <2 x i64> %sh2, %logic
773  ret <2 x i64> %r
774}
775
776define <8 x i16> @and_shl_commute3(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %y, <8 x i16> %z) {
777; CHECK-LABEL: and_shl_commute3:
778; CHECK:       # %bb.0:
779; CHECK-NEXT:    vpand %xmm1, %xmm0, %xmm0
780; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
781; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
782; CHECK-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
783; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
784; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
785; CHECK-NEXT:    vpand %xmm3, %xmm0, %xmm0
786; CHECK-NEXT:    vzeroupper
787; CHECK-NEXT:    retq
788  %sh1 = shl <8 x i16> %x0, %y
789  %sh2 = shl <8 x i16> %x1, %y
790  %logic = and <8 x i16> %z, %sh1
791  %r = and <8 x i16> %sh2, %logic
792  ret <8 x i16> %r
793}
794
795; negative test - mismatched shift opcodes
796
797define i64 @and_mix_shr(i64 %x0, i64 %x1, i64 %y, i64 %z) {
798; CHECK-LABEL: and_mix_shr:
799; CHECK:       # %bb.0:
800; CHECK-NEXT:    movq %rcx, %rax
801; CHECK-NEXT:    movq %rdx, %rcx
802; CHECK-NEXT:    shrq %cl, %rdi
803; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
804; CHECK-NEXT:    sarq %cl, %rsi
805; CHECK-NEXT:    andq %rdi, %rax
806; CHECK-NEXT:    andq %rsi, %rax
807; CHECK-NEXT:    retq
808  %sh1 = lshr i64 %x0, %y
809  %sh2 = ashr i64 %x1, %y
810  %logic = and i64 %sh1, %z
811  %r = and i64 %logic, %sh2
812  ret i64 %r
813}
814
815; negative test - mismatched shift amounts
816
817define i64 @and_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) {
818; CHECK-LABEL: and_lshr_mix_shift_amount:
819; CHECK:       # %bb.0:
820; CHECK-NEXT:    movq %rcx, %rax
821; CHECK-NEXT:    movq %rdx, %rcx
822; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
823; CHECK-NEXT:    shrq %cl, %rdi
824; CHECK-NEXT:    movl %r8d, %ecx
825; CHECK-NEXT:    shrq %cl, %rsi
826; CHECK-NEXT:    andq %rdi, %rax
827; CHECK-NEXT:    andq %rsi, %rax
828; CHECK-NEXT:    retq
829  %sh1 = lshr i64 %x0, %y
830  %sh2 = lshr i64 %x1, %w
831  %logic = and i64 %sh1, %z
832  %r = and i64 %logic, %sh2
833  ret i64 %r
834}
835
836; negative test - mismatched logic opcodes
837
838define i64 @mix_logic_shl(i64 %x0, i64 %x1, i64 %y, i64 %z) {
839; CHECK-LABEL: mix_logic_shl:
840; CHECK:       # %bb.0:
841; CHECK-NEXT:    movq %rcx, %rax
842; CHECK-NEXT:    movq %rdx, %rcx
843; CHECK-NEXT:    shlq %cl, %rdi
844; CHECK-NEXT:    # kill: def $cl killed $cl killed $rcx
845; CHECK-NEXT:    shlq %cl, %rsi
846; CHECK-NEXT:    xorq %rdi, %rax
847; CHECK-NEXT:    andq %rsi, %rax
848; CHECK-NEXT:    retq
849  %sh1 = shl i64 %x0, %y
850  %sh2 = shl i64 %x1, %y
851  %logic = xor i64 %sh1, %z
852  %r = and i64 %logic, %sh2
853  ret i64 %r
854}
855
856; (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
857
858define i32 @or_fshl_commute0(i32 %x, i32 %y) {
859; CHECK-LABEL: or_fshl_commute0:
860; CHECK:       # %bb.0:
861; CHECK-NEXT:    movl %esi, %eax
862; CHECK-NEXT:    orl %edi, %eax
863; CHECK-NEXT:    shldl $5, %edi, %eax
864; CHECK-NEXT:    retq
865  %or1 = or i32 %x, %y
866  %sh1 = shl i32 %or1, 5
867  %sh2 = lshr i32 %x, 27
868  %r = or i32 %sh1, %sh2
869  ret i32 %r
870}
871
872define i64 @or_fshl_commute1(i64 %x, i64 %y) {
873; CHECK-LABEL: or_fshl_commute1:
874; CHECK:       # %bb.0:
875; CHECK-NEXT:    movl %edi, %eax
876; CHECK-NEXT:    orl %esi, %eax
877; CHECK-NEXT:    shldq $35, %rdi, %rax
878; CHECK-NEXT:    retq
879  %or1 = or i64 %y, %x
880  %sh1 = shl i64 %or1, 35
881  %sh2 = lshr i64 %x, 29
882  %r = or i64 %sh1, %sh2
883  ret i64 %r
884}
885
886define i16 @or_fshl_commute2(i16 %x, i16 %y) {
887; CHECK-LABEL: or_fshl_commute2:
888; CHECK:       # %bb.0:
889; CHECK-NEXT:    movl %edi, %eax
890; CHECK-NEXT:    orl %edi, %esi
891; CHECK-NEXT:    shrdw $14, %si, %ax
892; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
893; CHECK-NEXT:    retq
894  %or1 = or i16 %x, %y
895  %sh1 = shl i16 %or1, 2
896  %sh2 = lshr i16 %x, 14
897  %r = or i16 %sh2, %sh1
898  ret i16 %r
899}
900
901define i8 @or_fshl_commute3(i8 %x, i8 %y) {
902; CHECK-LABEL: or_fshl_commute3:
903; CHECK:       # %bb.0:
904; CHECK-NEXT:    movl %edi, %eax
905; CHECK-NEXT:    orl %edi, %esi
906; CHECK-NEXT:    shlb $5, %sil
907; CHECK-NEXT:    shrb $3, %al
908; CHECK-NEXT:    orb %sil, %al
909; CHECK-NEXT:    # kill: def $al killed $al killed $eax
910; CHECK-NEXT:    retq
911  %or1 = or i8 %y, %x
912  %sh1 = shl i8 %or1, 5
913  %sh2 = lshr i8 %x, 3
914  %r = or i8 %sh2, %sh1
915  ret i8 %r
916}
917
918define i32 @or_fshl_wrong_shift(i32 %x, i32 %y) {
919; CHECK-LABEL: or_fshl_wrong_shift:
920; CHECK:       # %bb.0:
921; CHECK-NEXT:    movl %edi, %eax
922; CHECK-NEXT:    orl %edi, %esi
923; CHECK-NEXT:    shll $20, %esi
924; CHECK-NEXT:    shrl $11, %eax
925; CHECK-NEXT:    orl %esi, %eax
926; CHECK-NEXT:    retq
927  %or1 = or i32 %x, %y
928  %sh1 = shl i32 %or1, 20
929  %sh2 = lshr i32 %x, 11
930  %r = or i32 %sh1, %sh2
931  ret i32 %r
932}
933
934; (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
935
936define i64 @or_fshr_commute0(i64 %x, i64 %y) {
937; CHECK-LABEL: or_fshr_commute0:
938; CHECK:       # %bb.0:
939; CHECK-NEXT:    movq %rsi, %rax
940; CHECK-NEXT:    orq %rdi, %rax
941; CHECK-NEXT:    shrdq $24, %rdi, %rax
942; CHECK-NEXT:    retq
943  %or1 = or i64 %x, %y
944  %sh1 = shl i64 %x, 40
945  %sh2 = lshr i64 %or1, 24
946  %r = or i64 %sh1, %sh2
947  ret i64 %r
948}
949
950define i32 @or_fshr_commute1(i32 %x, i32 %y) {
951; CHECK-LABEL: or_fshr_commute1:
952; CHECK:       # %bb.0:
953; CHECK-NEXT:    movl %esi, %eax
954; CHECK-NEXT:    orl %edi, %eax
955; CHECK-NEXT:    shrdl $29, %edi, %eax
956; CHECK-NEXT:    retq
957  %or1 = or i32 %y, %x
958  %sh1 = shl i32 %x, 3
959  %sh2 = lshr i32 %or1, 29
960  %r = or i32 %sh1, %sh2
961  ret i32 %r
962}
963
964define i16 @or_fshr_commute2(i16 %x, i16 %y) {
965; CHECK-LABEL: or_fshr_commute2:
966; CHECK:       # %bb.0:
967; CHECK-NEXT:    movl %esi, %eax
968; CHECK-NEXT:    orl %edi, %eax
969; CHECK-NEXT:    shrdw $7, %di, %ax
970; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
971; CHECK-NEXT:    retq
972  %or1 = or i16 %x, %y
973  %sh1 = shl i16 %x, 9
974  %sh2 = lshr i16 %or1, 7
975  %r = or i16 %sh2, %sh1
976  ret i16 %r
977}
978
979define i8 @or_fshr_commute3(i8 %x, i8 %y) {
980; CHECK-LABEL: or_fshr_commute3:
981; CHECK:       # %bb.0:
982; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
983; CHECK-NEXT:    orl %edi, %esi
984; CHECK-NEXT:    shrb $6, %sil
985; CHECK-NEXT:    leal (,%rdi,4), %eax
986; CHECK-NEXT:    orb %sil, %al
987; CHECK-NEXT:    # kill: def $al killed $al killed $eax
988; CHECK-NEXT:    retq
989  %or1 = or i8 %y, %x
990  %sh1 = shl i8 %x, 2
991  %sh2 = lshr i8 %or1, 6
992  %r = or i8 %sh2, %sh1
993  ret i8 %r
994}
995
996define i32 @or_fshr_wrong_shift(i32 %x, i32 %y) {
997; CHECK-LABEL: or_fshr_wrong_shift:
998; CHECK:       # %bb.0:
999; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
1000; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
1001; CHECK-NEXT:    orl %edi, %esi
1002; CHECK-NEXT:    shll $7, %edi
1003; CHECK-NEXT:    shrl $26, %esi
1004; CHECK-NEXT:    leal (%rsi,%rdi), %eax
1005; CHECK-NEXT:    retq
1006  %or1 = or i32 %x, %y
1007  %sh1 = shl i32 %x, 7
1008  %sh2 = lshr i32 %or1, 26
1009  %r = or i32 %sh1, %sh2
1010  ret i32 %r
1011}
1012