1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 4 5; Check that under certain conditions we can factor out a rotate 6; from the following idioms: 7; (a*c0) >> s1 | (a*c1) 8; (a/c0) << s1 | (a/c1) 9; This targets cases where instcombine has folded a shl/srl/mul/udiv 10; with one of the shifts from the rotate idiom 11 12define i64 @rolq_extract_shl(i64 %i) nounwind { 13; X86-LABEL: rolq_extract_shl: 14; X86: # %bb.0: 15; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 16; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 17; X86-NEXT: movl %eax, %ecx 18; X86-NEXT: shldl $3, %edx, %ecx 19; X86-NEXT: shll $3, %eax 20; X86-NEXT: shll $3, %edx 21; X86-NEXT: shrdl $25, %edx, %eax 22; X86-NEXT: shrdl $25, %ecx, %edx 23; X86-NEXT: retl 24; 25; X64-LABEL: rolq_extract_shl: 26; X64: # %bb.0: 27; X64-NEXT: leaq (,%rdi,8), %rax 28; X64-NEXT: rolq $7, %rax 29; X64-NEXT: retq 30 %lhs_mul = shl i64 %i, 3 31 %rhs_mul = shl i64 %i, 10 32 %lhs_shift = lshr i64 %lhs_mul, 57 33 %out = or i64 %lhs_shift, %rhs_mul 34 ret i64 %out 35} 36 37define i16 @rolw_extract_shrl(i16 %i) nounwind { 38; X86-LABEL: rolw_extract_shrl: 39; X86: # %bb.0: 40; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 41; X86-NEXT: shrl $3, %eax 42; X86-NEXT: rolw $12, %ax 43; X86-NEXT: # kill: def $ax killed $ax killed $eax 44; X86-NEXT: retl 45; 46; X64-LABEL: rolw_extract_shrl: 47; X64: # %bb.0: 48; X64-NEXT: movzwl %di, %eax 49; X64-NEXT: shrl $3, %eax 50; X64-NEXT: rolw $12, %ax 51; X64-NEXT: # kill: def $ax killed $ax killed $eax 52; X64-NEXT: retq 53 %lhs_div = lshr i16 %i, 7 54 %rhs_div = lshr i16 %i, 3 55 %rhs_shift = shl i16 %rhs_div, 12 56 %out = or i16 %lhs_div, %rhs_shift 57 ret i16 %out 58} 59 60define i32 @roll_extract_mul(i32 %i) nounwind { 61; X86-LABEL: roll_extract_mul: 62; X86: # %bb.0: 63; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 64; X86-NEXT: leal (%eax,%eax,8), %eax 65; X86-NEXT: roll $7, %eax 66; X86-NEXT: retl 67; 68; X64-LABEL: roll_extract_mul: 69; X64: # %bb.0: 70; X64-NEXT: # kill: def $edi killed $edi def $rdi 71; X64-NEXT: leal (%rdi,%rdi,8), %eax 72; X64-NEXT: roll $7, %eax 73; X64-NEXT: retq 74 %lhs_mul = mul i32 %i, 9 75 %rhs_mul = mul i32 %i, 1152 76 %lhs_shift = lshr i32 %lhs_mul, 25 77 %out = or i32 %lhs_shift, %rhs_mul 78 ret i32 %out 79} 80 81define i8 @rolb_extract_udiv(i8 %i) nounwind { 82; X86-LABEL: rolb_extract_udiv: 83; X86: # %bb.0: 84; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 85; X86-NEXT: imull $171, %eax, %eax 86; X86-NEXT: shrl $9, %eax 87; X86-NEXT: rolb $4, %al 88; X86-NEXT: # kill: def $al killed $al killed $eax 89; X86-NEXT: retl 90; 91; X64-LABEL: rolb_extract_udiv: 92; X64: # %bb.0: 93; X64-NEXT: movzbl %dil, %eax 94; X64-NEXT: imull $171, %eax, %eax 95; X64-NEXT: shrl $9, %eax 96; X64-NEXT: rolb $4, %al 97; X64-NEXT: # kill: def $al killed $al killed $eax 98; X64-NEXT: retq 99 %lhs_div = udiv i8 %i, 3 100 %rhs_div = udiv i8 %i, 48 101 %lhs_shift = shl i8 %lhs_div, 4 102 %out = or i8 %lhs_shift, %rhs_div 103 ret i8 %out 104} 105 106define i64 @rolq_extract_mul_with_mask(i64 %i) nounwind { 107; X86-LABEL: rolq_extract_mul_with_mask: 108; X86: # %bb.0: 109; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 110; X86-NEXT: leal (%eax,%eax,8), %ecx 111; X86-NEXT: movl $9, %eax 112; X86-NEXT: mull {{[0-9]+}}(%esp) 113; X86-NEXT: addl %ecx, %edx 114; X86-NEXT: shrdl $25, %eax, %edx 115; X86-NEXT: movzbl %dl, %eax 116; X86-NEXT: xorl %edx, %edx 117; X86-NEXT: retl 118; 119; X64-LABEL: rolq_extract_mul_with_mask: 120; X64: # %bb.0: 121; X64-NEXT: leaq (%rdi,%rdi,8), %rax 122; X64-NEXT: rolq $7, %rax 123; X64-NEXT: movzbl %al, %eax 124; X64-NEXT: retq 125 %lhs_mul = mul i64 %i, 1152 126 %rhs_mul = mul i64 %i, 9 127 %lhs_and = and i64 %lhs_mul, 160 128 %rhs_shift = lshr i64 %rhs_mul, 57 129 %out = or i64 %lhs_and, %rhs_shift 130 ret i64 %out 131} 132 133; Result would undershift 134define i64 @no_extract_shl(i64 %i) nounwind { 135; X86-LABEL: no_extract_shl: 136; X86: # %bb.0: 137; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 138; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 139; X86-NEXT: movl %eax, %edx 140; X86-NEXT: shldl $10, %ecx, %edx 141; X86-NEXT: shll $10, %ecx 142; X86-NEXT: shrl $20, %eax 143; X86-NEXT: andl $127, %eax 144; X86-NEXT: orl %ecx, %eax 145; X86-NEXT: retl 146; 147; X64-LABEL: no_extract_shl: 148; X64: # %bb.0: 149; X64-NEXT: movq %rdi, %rax 150; X64-NEXT: shlq $10, %rax 151; X64-NEXT: shrq $52, %rdi 152; X64-NEXT: andl $127, %edi 153; X64-NEXT: orq %rdi, %rax 154; X64-NEXT: retq 155 %lhs_mul = shl i64 %i, 5 156 %rhs_mul = shl i64 %i, 10 157 %lhs_shift = lshr i64 %lhs_mul, 57 158 %out = or i64 %lhs_shift, %rhs_mul 159 ret i64 %out 160} 161 162; Result would overshift 163define i32 @no_extract_shrl(i32 %i) nounwind { 164; X86-LABEL: no_extract_shrl: 165; X86: # %bb.0: 166; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 167; X86-NEXT: movl %eax, %ecx 168; X86-NEXT: shrl $9, %ecx 169; X86-NEXT: andl $-8, %eax 170; X86-NEXT: shll $25, %eax 171; X86-NEXT: orl %ecx, %eax 172; X86-NEXT: retl 173; 174; X64-LABEL: no_extract_shrl: 175; X64: # %bb.0: 176; X64-NEXT: movl %edi, %eax 177; X64-NEXT: shrl $9, %eax 178; X64-NEXT: andl $-8, %edi 179; X64-NEXT: shll $25, %edi 180; X64-NEXT: orl %edi, %eax 181; X64-NEXT: retq 182 %lhs_div = lshr i32 %i, 3 183 %rhs_div = lshr i32 %i, 9 184 %lhs_shift = shl i32 %lhs_div, 28 185 %out = or i32 %lhs_shift, %rhs_div 186 ret i32 %out 187} 188 189; Can factor 128 from 2304, but result is 18 instead of 9 190define i16 @no_extract_mul(i16 %i) nounwind { 191; X86-LABEL: no_extract_mul: 192; X86: # %bb.0: 193; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 194; X86-NEXT: leal (%eax,%eax,8), %ecx 195; X86-NEXT: shll $8, %eax 196; X86-NEXT: leal (%eax,%eax,8), %edx 197; X86-NEXT: movzwl %cx, %eax 198; X86-NEXT: shrl $9, %eax 199; X86-NEXT: orl %edx, %eax 200; X86-NEXT: # kill: def $ax killed $ax killed $eax 201; X86-NEXT: retl 202; 203; X64-LABEL: no_extract_mul: 204; X64: # %bb.0: 205; X64-NEXT: # kill: def $edi killed $edi def $rdi 206; X64-NEXT: leal (%rdi,%rdi,8), %eax 207; X64-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi 208; X64-NEXT: shll $8, %edi 209; X64-NEXT: leal (%rdi,%rdi,8), %ecx 210; X64-NEXT: movzwl %ax, %eax 211; X64-NEXT: shrl $9, %eax 212; X64-NEXT: orl %ecx, %eax 213; X64-NEXT: # kill: def $ax killed $ax killed $eax 214; X64-NEXT: retq 215 %lhs_mul = mul i16 %i, 2304 216 %rhs_mul = mul i16 %i, 9 217 %rhs_shift = lshr i16 %rhs_mul, 9 218 %out = or i16 %lhs_mul, %rhs_shift 219 ret i16 %out 220} 221 222; Can't evenly factor 16 from 49 223define i8 @no_extract_udiv(i8 %i) nounwind { 224; X86-LABEL: no_extract_udiv: 225; X86: # %bb.0: 226; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 227; X86-NEXT: imull $171, %eax, %ecx 228; X86-NEXT: imull $79, %eax, %edx 229; X86-NEXT: subb %dh, %al 230; X86-NEXT: shrb %al 231; X86-NEXT: addb %dh, %al 232; X86-NEXT: shrb $5, %al 233; X86-NEXT: shlb $3, %ch 234; X86-NEXT: orb %al, %ch 235; X86-NEXT: andb $-9, %ch 236; X86-NEXT: movb %ch, %al 237; X86-NEXT: retl 238; 239; X64-LABEL: no_extract_udiv: 240; X64: # %bb.0: 241; X64-NEXT: movzbl %dil, %ecx 242; X64-NEXT: imull $171, %ecx, %eax 243; X64-NEXT: shrl $8, %eax 244; X64-NEXT: imull $79, %ecx, %edx 245; X64-NEXT: shrl $8, %edx 246; X64-NEXT: subb %dl, %cl 247; X64-NEXT: shrb %cl 248; X64-NEXT: addb %dl, %cl 249; X64-NEXT: shrb $5, %cl 250; X64-NEXT: shlb $3, %al 251; X64-NEXT: orb %cl, %al 252; X64-NEXT: andb $-9, %al 253; X64-NEXT: # kill: def $al killed $al killed $eax 254; X64-NEXT: retq 255 %lhs_div = udiv i8 %i, 3 256 %rhs_div = udiv i8 %i, 49 257 %lhs_shift = shl i8 %lhs_div,4 258 %out = or i8 %lhs_shift, %rhs_div 259 ret i8 %out 260} 261 262; DAGCombiner transforms shl X, 1 into add X, X. 263define i32 @extract_add_1(i32 %i) nounwind { 264; X86-LABEL: extract_add_1: 265; X86: # %bb.0: 266; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 267; X86-NEXT: roll %eax 268; X86-NEXT: retl 269; 270; X64-LABEL: extract_add_1: 271; X64: # %bb.0: 272; X64-NEXT: movl %edi, %eax 273; X64-NEXT: roll %eax 274; X64-NEXT: retq 275 %ii = add i32 %i, %i 276 %rhs = lshr i32 %i, 31 277 %out = or i32 %ii, %rhs 278 ret i32 %out 279} 280 281define i32 @extract_add_1_comut(i32 %i) nounwind { 282; X86-LABEL: extract_add_1_comut: 283; X86: # %bb.0: 284; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 285; X86-NEXT: roll %eax 286; X86-NEXT: retl 287; 288; X64-LABEL: extract_add_1_comut: 289; X64: # %bb.0: 290; X64-NEXT: movl %edi, %eax 291; X64-NEXT: roll %eax 292; X64-NEXT: retq 293 %ii = add i32 %i, %i 294 %lhs = lshr i32 %i, 31 295 %out = or i32 %lhs, %ii 296 ret i32 %out 297} 298 299define i32 @no_extract_add_1(i32 %i) nounwind { 300; X86-LABEL: no_extract_add_1: 301; X86: # %bb.0: 302; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 303; X86-NEXT: leal (%eax,%eax), %ecx 304; X86-NEXT: shrl $27, %eax 305; X86-NEXT: orl %ecx, %eax 306; X86-NEXT: retl 307; 308; X64-LABEL: no_extract_add_1: 309; X64: # %bb.0: 310; X64-NEXT: # kill: def $edi killed $edi def $rdi 311; X64-NEXT: leal (%rdi,%rdi), %eax 312; X64-NEXT: shrl $27, %edi 313; X64-NEXT: orl %edi, %eax 314; X64-NEXT: retq 315 %ii = add i32 %i, %i 316 %rhs = lshr i32 %i, 27 317 %out = or i32 %ii, %rhs 318 ret i32 %out 319} 320