xref: /llvm-project/llvm/test/CodeGen/X86/rotate4.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
4
5; Check that we recognize this idiom for rotation too:
6;    a << (b & (OpSize-1)) | a >> ((0 - b) & (OpSize-1))
7
8define i32 @rotate_left_32(i32 %a, i32 %b) {
9; X86-LABEL: rotate_left_32:
10; X86:       # %bb.0:
11; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
12; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
13; X86-NEXT:    roll %cl, %eax
14; X86-NEXT:    retl
15;
16; X64-LABEL: rotate_left_32:
17; X64:       # %bb.0:
18; X64-NEXT:    movl %esi, %ecx
19; X64-NEXT:    movl %edi, %eax
20; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
21; X64-NEXT:    roll %cl, %eax
22; X64-NEXT:    retq
23  %and = and i32 %b, 31
24  %shl = shl i32 %a, %and
25  %t0 = sub i32 0, %b
26  %and3 = and i32 %t0, 31
27  %shr = lshr i32 %a, %and3
28  %or = or i32 %shl, %shr
29  ret i32 %or
30}
31
32define i32 @rotate_right_32(i32 %a, i32 %b) {
33; X86-LABEL: rotate_right_32:
34; X86:       # %bb.0:
35; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
36; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
37; X86-NEXT:    rorl %cl, %eax
38; X86-NEXT:    retl
39;
40; X64-LABEL: rotate_right_32:
41; X64:       # %bb.0:
42; X64-NEXT:    movl %esi, %ecx
43; X64-NEXT:    movl %edi, %eax
44; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
45; X64-NEXT:    rorl %cl, %eax
46; X64-NEXT:    retq
47  %and = and i32 %b, 31
48  %shl = lshr i32 %a, %and
49  %t0 = sub i32 0, %b
50  %and3 = and i32 %t0, 31
51  %shr = shl i32 %a, %and3
52  %or = or i32 %shl, %shr
53  ret i32 %or
54}
55
56define i64 @rotate_left_64(i64 %a, i64 %b) {
57; X86-LABEL: rotate_left_64:
58; X86:       # %bb.0:
59; X86-NEXT:    pushl %ebx
60; X86-NEXT:    .cfi_def_cfa_offset 8
61; X86-NEXT:    pushl %edi
62; X86-NEXT:    .cfi_def_cfa_offset 12
63; X86-NEXT:    pushl %esi
64; X86-NEXT:    .cfi_def_cfa_offset 16
65; X86-NEXT:    .cfi_offset %esi, -16
66; X86-NEXT:    .cfi_offset %edi, -12
67; X86-NEXT:    .cfi_offset %ebx, -8
68; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
69; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
70; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
71; X86-NEXT:    movl %esi, %eax
72; X86-NEXT:    shll %cl, %eax
73; X86-NEXT:    movl %edi, %edx
74; X86-NEXT:    shldl %cl, %esi, %edx
75; X86-NEXT:    testb $32, %cl
76; X86-NEXT:    je .LBB2_2
77; X86-NEXT:  # %bb.1:
78; X86-NEXT:    movl %eax, %edx
79; X86-NEXT:    xorl %eax, %eax
80; X86-NEXT:  .LBB2_2:
81; X86-NEXT:    negb %cl
82; X86-NEXT:    movl %edi, %ebx
83; X86-NEXT:    shrl %cl, %ebx
84; X86-NEXT:    shrdl %cl, %edi, %esi
85; X86-NEXT:    testb $32, %cl
86; X86-NEXT:    je .LBB2_4
87; X86-NEXT:  # %bb.3:
88; X86-NEXT:    movl %ebx, %esi
89; X86-NEXT:    xorl %ebx, %ebx
90; X86-NEXT:  .LBB2_4:
91; X86-NEXT:    orl %ebx, %edx
92; X86-NEXT:    orl %esi, %eax
93; X86-NEXT:    popl %esi
94; X86-NEXT:    .cfi_def_cfa_offset 12
95; X86-NEXT:    popl %edi
96; X86-NEXT:    .cfi_def_cfa_offset 8
97; X86-NEXT:    popl %ebx
98; X86-NEXT:    .cfi_def_cfa_offset 4
99; X86-NEXT:    retl
100;
101; X64-LABEL: rotate_left_64:
102; X64:       # %bb.0:
103; X64-NEXT:    movq %rsi, %rcx
104; X64-NEXT:    movq %rdi, %rax
105; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
106; X64-NEXT:    rolq %cl, %rax
107; X64-NEXT:    retq
108  %and = and i64 %b, 63
109  %shl = shl i64 %a, %and
110  %t0 = sub i64 0, %b
111  %and3 = and i64 %t0, 63
112  %shr = lshr i64 %a, %and3
113  %or = or i64 %shl, %shr
114  ret i64 %or
115}
116
117define i64 @rotate_right_64(i64 %a, i64 %b) {
118; X86-LABEL: rotate_right_64:
119; X86:       # %bb.0:
120; X86-NEXT:    pushl %ebx
121; X86-NEXT:    .cfi_def_cfa_offset 8
122; X86-NEXT:    pushl %edi
123; X86-NEXT:    .cfi_def_cfa_offset 12
124; X86-NEXT:    pushl %esi
125; X86-NEXT:    .cfi_def_cfa_offset 16
126; X86-NEXT:    .cfi_offset %esi, -16
127; X86-NEXT:    .cfi_offset %edi, -12
128; X86-NEXT:    .cfi_offset %ebx, -8
129; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
130; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
131; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
132; X86-NEXT:    movl %esi, %edx
133; X86-NEXT:    shrl %cl, %edx
134; X86-NEXT:    movl %edi, %eax
135; X86-NEXT:    shrdl %cl, %esi, %eax
136; X86-NEXT:    testb $32, %cl
137; X86-NEXT:    je .LBB3_2
138; X86-NEXT:  # %bb.1:
139; X86-NEXT:    movl %edx, %eax
140; X86-NEXT:    xorl %edx, %edx
141; X86-NEXT:  .LBB3_2:
142; X86-NEXT:    negb %cl
143; X86-NEXT:    movl %edi, %ebx
144; X86-NEXT:    shll %cl, %ebx
145; X86-NEXT:    shldl %cl, %edi, %esi
146; X86-NEXT:    testb $32, %cl
147; X86-NEXT:    je .LBB3_4
148; X86-NEXT:  # %bb.3:
149; X86-NEXT:    movl %ebx, %esi
150; X86-NEXT:    xorl %ebx, %ebx
151; X86-NEXT:  .LBB3_4:
152; X86-NEXT:    orl %esi, %edx
153; X86-NEXT:    orl %ebx, %eax
154; X86-NEXT:    popl %esi
155; X86-NEXT:    .cfi_def_cfa_offset 12
156; X86-NEXT:    popl %edi
157; X86-NEXT:    .cfi_def_cfa_offset 8
158; X86-NEXT:    popl %ebx
159; X86-NEXT:    .cfi_def_cfa_offset 4
160; X86-NEXT:    retl
161;
162; X64-LABEL: rotate_right_64:
163; X64:       # %bb.0:
164; X64-NEXT:    movq %rsi, %rcx
165; X64-NEXT:    movq %rdi, %rax
166; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
167; X64-NEXT:    rorq %cl, %rax
168; X64-NEXT:    retq
169  %and = and i64 %b, 63
170  %shl = lshr i64 %a, %and
171  %t0 = sub i64 0, %b
172  %and3 = and i64 %t0, 63
173  %shr = shl i64 %a, %and3
174  %or = or i64 %shl, %shr
175  ret i64 %or
176}
177
178; Also check mem operand.
179
180define void @rotate_left_m32(ptr%pa, i32 %b) {
181; X86-LABEL: rotate_left_m32:
182; X86:       # %bb.0:
183; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
184; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
185; X86-NEXT:    roll %cl, (%eax)
186; X86-NEXT:    retl
187;
188; X64-LABEL: rotate_left_m32:
189; X64:       # %bb.0:
190; X64-NEXT:    movl %esi, %ecx
191; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
192; X64-NEXT:    roll %cl, (%rdi)
193; X64-NEXT:    retq
194  %a = load i32, ptr %pa, align 16
195  %and = and i32 %b, 31
196  %shl = shl i32 %a, %and
197  %t0 = sub i32 0, %b
198  %and3 = and i32 %t0, 31
199  %shr = lshr i32 %a, %and3
200  %or = or i32 %shl, %shr
201  store i32 %or, ptr %pa, align 32
202  ret void
203}
204
205define void @rotate_right_m32(ptr%pa, i32 %b) {
206; X86-LABEL: rotate_right_m32:
207; X86:       # %bb.0:
208; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
209; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
210; X86-NEXT:    rorl %cl, (%eax)
211; X86-NEXT:    retl
212;
213; X64-LABEL: rotate_right_m32:
214; X64:       # %bb.0:
215; X64-NEXT:    movl %esi, %ecx
216; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
217; X64-NEXT:    rorl %cl, (%rdi)
218; X64-NEXT:    retq
219  %a = load i32, ptr %pa, align 16
220  %and = and i32 %b, 31
221  %shl = lshr i32 %a, %and
222  %t0 = sub i32 0, %b
223  %and3 = and i32 %t0, 31
224  %shr = shl i32 %a, %and3
225  %or = or i32 %shl, %shr
226  store i32 %or, ptr %pa, align 32
227  ret void
228}
229
230define void @rotate_left_m64(ptr%pa, i64 %b) {
231; X86-LABEL: rotate_left_m64:
232; X86:       # %bb.0:
233; X86-NEXT:    pushl %ebp
234; X86-NEXT:    .cfi_def_cfa_offset 8
235; X86-NEXT:    pushl %ebx
236; X86-NEXT:    .cfi_def_cfa_offset 12
237; X86-NEXT:    pushl %edi
238; X86-NEXT:    .cfi_def_cfa_offset 16
239; X86-NEXT:    pushl %esi
240; X86-NEXT:    .cfi_def_cfa_offset 20
241; X86-NEXT:    .cfi_offset %esi, -20
242; X86-NEXT:    .cfi_offset %edi, -16
243; X86-NEXT:    .cfi_offset %ebx, -12
244; X86-NEXT:    .cfi_offset %ebp, -8
245; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
246; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
247; X86-NEXT:    movl (%eax), %esi
248; X86-NEXT:    movl 4(%eax), %ebx
249; X86-NEXT:    movl %esi, %edx
250; X86-NEXT:    shll %cl, %edx
251; X86-NEXT:    movl %ebx, %edi
252; X86-NEXT:    shldl %cl, %esi, %edi
253; X86-NEXT:    testb $32, %cl
254; X86-NEXT:    je .LBB6_2
255; X86-NEXT:  # %bb.1:
256; X86-NEXT:    movl %edx, %edi
257; X86-NEXT:    xorl %edx, %edx
258; X86-NEXT:  .LBB6_2:
259; X86-NEXT:    negb %cl
260; X86-NEXT:    movl %ebx, %ebp
261; X86-NEXT:    shrl %cl, %ebp
262; X86-NEXT:    shrdl %cl, %ebx, %esi
263; X86-NEXT:    testb $32, %cl
264; X86-NEXT:    je .LBB6_4
265; X86-NEXT:  # %bb.3:
266; X86-NEXT:    movl %ebp, %esi
267; X86-NEXT:    xorl %ebp, %ebp
268; X86-NEXT:  .LBB6_4:
269; X86-NEXT:    orl %esi, %edx
270; X86-NEXT:    orl %ebp, %edi
271; X86-NEXT:    movl %edx, (%eax)
272; X86-NEXT:    movl %edi, 4(%eax)
273; X86-NEXT:    popl %esi
274; X86-NEXT:    .cfi_def_cfa_offset 16
275; X86-NEXT:    popl %edi
276; X86-NEXT:    .cfi_def_cfa_offset 12
277; X86-NEXT:    popl %ebx
278; X86-NEXT:    .cfi_def_cfa_offset 8
279; X86-NEXT:    popl %ebp
280; X86-NEXT:    .cfi_def_cfa_offset 4
281; X86-NEXT:    retl
282;
283; X64-LABEL: rotate_left_m64:
284; X64:       # %bb.0:
285; X64-NEXT:    movq %rsi, %rcx
286; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
287; X64-NEXT:    rolq %cl, (%rdi)
288; X64-NEXT:    retq
289  %a = load i64, ptr %pa, align 16
290  %and = and i64 %b, 63
291  %shl = shl i64 %a, %and
292  %t0 = sub i64 0, %b
293  %and3 = and i64 %t0, 63
294  %shr = lshr i64 %a, %and3
295  %or = or i64 %shl, %shr
296  store i64 %or, ptr %pa, align 64
297  ret void
298}
299
300define void @rotate_right_m64(ptr%pa, i64 %b) {
301; X86-LABEL: rotate_right_m64:
302; X86:       # %bb.0:
303; X86-NEXT:    pushl %ebp
304; X86-NEXT:    .cfi_def_cfa_offset 8
305; X86-NEXT:    pushl %ebx
306; X86-NEXT:    .cfi_def_cfa_offset 12
307; X86-NEXT:    pushl %edi
308; X86-NEXT:    .cfi_def_cfa_offset 16
309; X86-NEXT:    pushl %esi
310; X86-NEXT:    .cfi_def_cfa_offset 20
311; X86-NEXT:    .cfi_offset %esi, -20
312; X86-NEXT:    .cfi_offset %edi, -16
313; X86-NEXT:    .cfi_offset %ebx, -12
314; X86-NEXT:    .cfi_offset %ebp, -8
315; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
316; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
317; X86-NEXT:    movl (%eax), %ebx
318; X86-NEXT:    movl 4(%eax), %esi
319; X86-NEXT:    movl %esi, %edx
320; X86-NEXT:    shrl %cl, %edx
321; X86-NEXT:    movl %ebx, %edi
322; X86-NEXT:    shrdl %cl, %esi, %edi
323; X86-NEXT:    testb $32, %cl
324; X86-NEXT:    je .LBB7_2
325; X86-NEXT:  # %bb.1:
326; X86-NEXT:    movl %edx, %edi
327; X86-NEXT:    xorl %edx, %edx
328; X86-NEXT:  .LBB7_2:
329; X86-NEXT:    negb %cl
330; X86-NEXT:    movl %ebx, %ebp
331; X86-NEXT:    shll %cl, %ebp
332; X86-NEXT:    shldl %cl, %ebx, %esi
333; X86-NEXT:    testb $32, %cl
334; X86-NEXT:    je .LBB7_4
335; X86-NEXT:  # %bb.3:
336; X86-NEXT:    movl %ebp, %esi
337; X86-NEXT:    xorl %ebp, %ebp
338; X86-NEXT:  .LBB7_4:
339; X86-NEXT:    orl %ebp, %edi
340; X86-NEXT:    orl %esi, %edx
341; X86-NEXT:    movl %edi, (%eax)
342; X86-NEXT:    movl %edx, 4(%eax)
343; X86-NEXT:    popl %esi
344; X86-NEXT:    .cfi_def_cfa_offset 16
345; X86-NEXT:    popl %edi
346; X86-NEXT:    .cfi_def_cfa_offset 12
347; X86-NEXT:    popl %ebx
348; X86-NEXT:    .cfi_def_cfa_offset 8
349; X86-NEXT:    popl %ebp
350; X86-NEXT:    .cfi_def_cfa_offset 4
351; X86-NEXT:    retl
352;
353; X64-LABEL: rotate_right_m64:
354; X64:       # %bb.0:
355; X64-NEXT:    movq %rsi, %rcx
356; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
357; X64-NEXT:    rorq %cl, (%rdi)
358; X64-NEXT:    retq
359  %a = load i64, ptr %pa, align 16
360  %and = and i64 %b, 63
361  %shl = lshr i64 %a, %and
362  %t0 = sub i64 0, %b
363  %and3 = and i64 %t0, 63
364  %shr = shl i64 %a, %and3
365  %or = or i64 %shl, %shr
366  store i64 %or, ptr %pa, align 64
367  ret void
368}
369
370; The next 8 tests include masks of the narrow width shift amounts that should be eliminated.
371; These patterns are produced by instcombine after r310509.
372
373define i8 @rotate_left_8(i8 %x, i32 %amount) {
374; X86-LABEL: rotate_left_8:
375; X86:       # %bb.0:
376; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
377; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
378; X86-NEXT:    rolb %cl, %al
379; X86-NEXT:    retl
380;
381; X64-LABEL: rotate_left_8:
382; X64:       # %bb.0:
383; X64-NEXT:    movl %esi, %ecx
384; X64-NEXT:    movl %edi, %eax
385; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
386; X64-NEXT:    rolb %cl, %al
387; X64-NEXT:    # kill: def $al killed $al killed $eax
388; X64-NEXT:    retq
389  %amt = trunc i32 %amount to i8
390  %sub = sub i8 0, %amt
391  %maskamt = and i8 %amt, 7
392  %masksub = and i8 %sub, 7
393  %shl = shl i8 %x, %maskamt
394  %shr = lshr i8 %x, %masksub
395  %or = or i8 %shl, %shr
396  ret i8 %or
397}
398
399define i8 @rotate_right_8(i8 %x, i32 %amount) {
400; X86-LABEL: rotate_right_8:
401; X86:       # %bb.0:
402; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
403; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
404; X86-NEXT:    rorb %cl, %al
405; X86-NEXT:    retl
406;
407; X64-LABEL: rotate_right_8:
408; X64:       # %bb.0:
409; X64-NEXT:    movl %esi, %ecx
410; X64-NEXT:    movl %edi, %eax
411; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
412; X64-NEXT:    rorb %cl, %al
413; X64-NEXT:    # kill: def $al killed $al killed $eax
414; X64-NEXT:    retq
415  %amt = trunc i32 %amount to i8
416  %sub = sub i8 0, %amt
417  %maskamt = and i8 %amt, 7
418  %masksub = and i8 %sub, 7
419  %shr = lshr i8 %x, %maskamt
420  %shl = shl i8 %x, %masksub
421  %or = or i8 %shr, %shl
422  ret i8 %or
423}
424
425define i16 @rotate_left_16(i16 %x, i32 %amount) {
426; X86-LABEL: rotate_left_16:
427; X86:       # %bb.0:
428; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
429; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
430; X86-NEXT:    rolw %cl, %ax
431; X86-NEXT:    retl
432;
433; X64-LABEL: rotate_left_16:
434; X64:       # %bb.0:
435; X64-NEXT:    movl %esi, %ecx
436; X64-NEXT:    movl %edi, %eax
437; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
438; X64-NEXT:    rolw %cl, %ax
439; X64-NEXT:    # kill: def $ax killed $ax killed $eax
440; X64-NEXT:    retq
441  %amt = trunc i32 %amount to i16
442  %sub = sub i16 0, %amt
443  %maskamt = and i16 %amt, 15
444  %masksub = and i16 %sub, 15
445  %shl = shl i16 %x, %maskamt
446  %shr = lshr i16 %x, %masksub
447  %or = or i16 %shl, %shr
448  ret i16 %or
449}
450
451define i16 @rotate_right_16(i16 %x, i32 %amount) {
452; X86-LABEL: rotate_right_16:
453; X86:       # %bb.0:
454; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
455; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
456; X86-NEXT:    rorw %cl, %ax
457; X86-NEXT:    retl
458;
459; X64-LABEL: rotate_right_16:
460; X64:       # %bb.0:
461; X64-NEXT:    movl %esi, %ecx
462; X64-NEXT:    movl %edi, %eax
463; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
464; X64-NEXT:    rorw %cl, %ax
465; X64-NEXT:    # kill: def $ax killed $ax killed $eax
466; X64-NEXT:    retq
467  %amt = trunc i32 %amount to i16
468  %sub = sub i16 0, %amt
469  %maskamt = and i16 %amt, 15
470  %masksub = and i16 %sub, 15
471  %shr = lshr i16 %x, %maskamt
472  %shl = shl i16 %x, %masksub
473  %or = or i16 %shr, %shl
474  ret i16 %or
475}
476
477define void @rotate_left_m8(ptr %p, i32 %amount) {
478; X86-LABEL: rotate_left_m8:
479; X86:       # %bb.0:
480; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
481; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
482; X86-NEXT:    rolb %cl, (%eax)
483; X86-NEXT:    retl
484;
485; X64-LABEL: rotate_left_m8:
486; X64:       # %bb.0:
487; X64-NEXT:    movl %esi, %ecx
488; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
489; X64-NEXT:    rolb %cl, (%rdi)
490; X64-NEXT:    retq
491  %x = load i8, ptr %p, align 1
492  %amt = trunc i32 %amount to i8
493  %sub = sub i8 0, %amt
494  %maskamt = and i8 %amt, 7
495  %masksub = and i8 %sub, 7
496  %shl = shl i8 %x, %maskamt
497  %shr = lshr i8 %x, %masksub
498  %or = or i8 %shl, %shr
499  store i8 %or, ptr %p, align 1
500  ret void
501}
502
503define void @rotate_right_m8(ptr %p, i32 %amount) {
504; X86-LABEL: rotate_right_m8:
505; X86:       # %bb.0:
506; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
507; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
508; X86-NEXT:    rorb %cl, (%eax)
509; X86-NEXT:    retl
510;
511; X64-LABEL: rotate_right_m8:
512; X64:       # %bb.0:
513; X64-NEXT:    movl %esi, %ecx
514; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
515; X64-NEXT:    rorb %cl, (%rdi)
516; X64-NEXT:    retq
517  %x = load i8, ptr %p, align 1
518  %amt = trunc i32 %amount to i8
519  %sub = sub i8 0, %amt
520  %maskamt = and i8 %amt, 7
521  %masksub = and i8 %sub, 7
522  %shl = shl i8 %x, %masksub
523  %shr = lshr i8 %x, %maskamt
524  %or = or i8 %shl, %shr
525  store i8 %or, ptr %p, align 1
526  ret void
527}
528
529define void @rotate_left_m16(ptr %p, i32 %amount) {
530; X86-LABEL: rotate_left_m16:
531; X86:       # %bb.0:
532; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
533; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
534; X86-NEXT:    rolw %cl, (%eax)
535; X86-NEXT:    retl
536;
537; X64-LABEL: rotate_left_m16:
538; X64:       # %bb.0:
539; X64-NEXT:    movl %esi, %ecx
540; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
541; X64-NEXT:    rolw %cl, (%rdi)
542; X64-NEXT:    retq
543  %x = load i16, ptr %p, align 1
544  %amt = trunc i32 %amount to i16
545  %sub = sub i16 0, %amt
546  %maskamt = and i16 %amt, 15
547  %masksub = and i16 %sub, 15
548  %shl = shl i16 %x, %maskamt
549  %shr = lshr i16 %x, %masksub
550  %or = or i16 %shl, %shr
551  store i16 %or, ptr %p, align 1
552  ret void
553}
554
555define void @rotate_right_m16(ptr %p, i32 %amount) {
556; X86-LABEL: rotate_right_m16:
557; X86:       # %bb.0:
558; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
559; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
560; X86-NEXT:    rorw %cl, (%eax)
561; X86-NEXT:    retl
562;
563; X64-LABEL: rotate_right_m16:
564; X64:       # %bb.0:
565; X64-NEXT:    movl %esi, %ecx
566; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
567; X64-NEXT:    rorw %cl, (%rdi)
568; X64-NEXT:    retq
569  %x = load i16, ptr %p, align 1
570  %amt = trunc i32 %amount to i16
571  %sub = sub i16 0, %amt
572  %maskamt = and i16 %amt, 15
573  %masksub = and i16 %sub, 15
574  %shl = shl i16 %x, %masksub
575  %shr = lshr i16 %x, %maskamt
576  %or = or i16 %shl, %shr
577  store i16 %or, ptr %p, align 1
578  ret void
579}
580
581define i32 @rotate_demanded_bits(i32, i32) {
582; X86-LABEL: rotate_demanded_bits:
583; X86:       # %bb.0:
584; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
585; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
586; X86-NEXT:    andb $30, %cl
587; X86-NEXT:    roll %cl, %eax
588; X86-NEXT:    retl
589;
590; X64-LABEL: rotate_demanded_bits:
591; X64:       # %bb.0:
592; X64-NEXT:    movl %esi, %ecx
593; X64-NEXT:    movl %edi, %eax
594; X64-NEXT:    andb $30, %cl
595; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
596; X64-NEXT:    roll %cl, %eax
597; X64-NEXT:    retq
598  %3 = and i32 %1, 30
599  %4 = shl i32 %0, %3
600  %5 = sub nsw i32 0, %3
601  %6 = and i32 %5, 30
602  %7 = lshr i32 %0, %6
603  %8 = or i32 %7, %4
604  ret i32 %8
605}
606
607define i32 @rotate_demanded_bits_2(i32, i32) {
608; X86-LABEL: rotate_demanded_bits_2:
609; X86:       # %bb.0:
610; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
611; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
612; X86-NEXT:    andb $23, %cl
613; X86-NEXT:    roll %cl, %eax
614; X86-NEXT:    retl
615;
616; X64-LABEL: rotate_demanded_bits_2:
617; X64:       # %bb.0:
618; X64-NEXT:    movl %esi, %ecx
619; X64-NEXT:    movl %edi, %eax
620; X64-NEXT:    andb $23, %cl
621; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
622; X64-NEXT:    roll %cl, %eax
623; X64-NEXT:    retq
624  %3 = and i32 %1, 23
625  %4 = shl i32 %0, %3
626  %5 = sub nsw i32 0, %3
627  %6 = and i32 %5, 31
628  %7 = lshr i32 %0, %6
629  %8 = or i32 %7, %4
630  ret i32 %8
631}
632
633define i32 @rotate_demanded_bits_3(i32, i32) {
634; X86-LABEL: rotate_demanded_bits_3:
635; X86:       # %bb.0:
636; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
637; X86-NEXT:    addb %cl, %cl
638; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
639; X86-NEXT:    roll %cl, %eax
640; X86-NEXT:    retl
641;
642; X64-LABEL: rotate_demanded_bits_3:
643; X64:       # %bb.0:
644; X64-NEXT:    # kill: def $esi killed $esi def $rsi
645; X64-NEXT:    movl %edi, %eax
646; X64-NEXT:    leal (%rsi,%rsi), %ecx
647; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
648; X64-NEXT:    roll %cl, %eax
649; X64-NEXT:    retq
650  %3 = shl i32 %1, 1
651  %4 = and i32 %3, 30
652  %5 = shl i32 %0, %4
653  %6 = sub i32 0, %3
654  %7 = and i32 %6, 30
655  %8 = lshr i32 %0, %7
656  %9 = or i32 %5, %8
657  ret i32 %9
658}
659