1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW 6 7declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone 8declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone 9declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone 10declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone 11declare i128 @llvm.fshl.i128(i128, i128, i128) nounwind readnone 12 13; 14; Variable Funnel Shift 15; 16 17define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { 18; X86-LABEL: var_shift_i8: 19; X86: # %bb.0: 20; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 21; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx 22; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 23; X86-NEXT: shll $8, %eax 24; X86-NEXT: orl %edx, %eax 25; X86-NEXT: andb $7, %cl 26; X86-NEXT: shll %cl, %eax 27; X86-NEXT: movb %ah, %al 28; X86-NEXT: retl 29; 30; X64-LABEL: var_shift_i8: 31; X64: # %bb.0: 32; X64-NEXT: movl %edx, %ecx 33; X64-NEXT: shll $8, %edi 34; X64-NEXT: movzbl %sil, %eax 35; X64-NEXT: orl %edi, %eax 36; X64-NEXT: andb $7, %cl 37; X64-NEXT: # kill: def $cl killed $cl killed $ecx 38; X64-NEXT: shll %cl, %eax 39; X64-NEXT: shrl $8, %eax 40; X64-NEXT: # kill: def $al killed $al killed $eax 41; X64-NEXT: retq 42 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z) 43 ret i8 %tmp 44} 45 46define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { 47; X86-FAST-LABEL: var_shift_i16: 48; X86-FAST: # %bb.0: 49; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx 50; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax 51; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 52; X86-FAST-NEXT: andb $15, %cl 53; X86-FAST-NEXT: shldw %cl, %dx, %ax 54; X86-FAST-NEXT: retl 55; 56; X86-SLOW-LABEL: var_shift_i16: 57; X86-SLOW: # %bb.0: 58; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 59; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx 60; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 61; X86-SLOW-NEXT: shll $16, %eax 62; X86-SLOW-NEXT: orl %edx, %eax 63; X86-SLOW-NEXT: andb $15, %cl 64; X86-SLOW-NEXT: shll %cl, %eax 65; X86-SLOW-NEXT: shrl $16, %eax 66; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 67; X86-SLOW-NEXT: retl 68; 69; X64-FAST-LABEL: var_shift_i16: 70; X64-FAST: # %bb.0: 71; X64-FAST-NEXT: movl %edx, %ecx 72; X64-FAST-NEXT: movl %edi, %eax 73; X64-FAST-NEXT: andb $15, %cl 74; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 75; X64-FAST-NEXT: shldw %cl, %si, %ax 76; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax 77; X64-FAST-NEXT: retq 78; 79; X64-SLOW-LABEL: var_shift_i16: 80; X64-SLOW: # %bb.0: 81; X64-SLOW-NEXT: movl %edx, %ecx 82; X64-SLOW-NEXT: shll $16, %edi 83; X64-SLOW-NEXT: movzwl %si, %eax 84; X64-SLOW-NEXT: orl %edi, %eax 85; X64-SLOW-NEXT: andb $15, %cl 86; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx 87; X64-SLOW-NEXT: shll %cl, %eax 88; X64-SLOW-NEXT: shrl $16, %eax 89; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 90; X64-SLOW-NEXT: retq 91 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z) 92 ret i16 %tmp 93} 94 95define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { 96; X86-FAST-LABEL: var_shift_i32: 97; X86-FAST: # %bb.0: 98; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 99; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 100; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 101; X86-FAST-NEXT: shldl %cl, %edx, %eax 102; X86-FAST-NEXT: retl 103; 104; X86-SLOW-LABEL: var_shift_i32: 105; X86-SLOW: # %bb.0: 106; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 107; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 108; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 109; X86-SLOW-NEXT: shll %cl, %edx 110; X86-SLOW-NEXT: notb %cl 111; X86-SLOW-NEXT: shrl %eax 112; X86-SLOW-NEXT: shrl %cl, %eax 113; X86-SLOW-NEXT: orl %edx, %eax 114; X86-SLOW-NEXT: retl 115; 116; X64-FAST-LABEL: var_shift_i32: 117; X64-FAST: # %bb.0: 118; X64-FAST-NEXT: movl %edx, %ecx 119; X64-FAST-NEXT: movl %edi, %eax 120; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 121; X64-FAST-NEXT: shldl %cl, %esi, %eax 122; X64-FAST-NEXT: retq 123; 124; X64-SLOW-LABEL: var_shift_i32: 125; X64-SLOW: # %bb.0: 126; X64-SLOW-NEXT: movl %edx, %ecx 127; X64-SLOW-NEXT: movl %esi, %eax 128; X64-SLOW-NEXT: shll %cl, %edi 129; X64-SLOW-NEXT: shrl %eax 130; X64-SLOW-NEXT: notb %cl 131; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx 132; X64-SLOW-NEXT: shrl %cl, %eax 133; X64-SLOW-NEXT: orl %edi, %eax 134; X64-SLOW-NEXT: retq 135 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 136 ret i32 %tmp 137} 138 139define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize { 140; X86-LABEL: var_shift_i32_optsize: 141; X86: # %bb.0: 142; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 143; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-NEXT: shldl %cl, %edx, %eax 146; X86-NEXT: retl 147; 148; X64-LABEL: var_shift_i32_optsize: 149; X64: # %bb.0: 150; X64-NEXT: movl %edx, %ecx 151; X64-NEXT: movl %edi, %eax 152; X64-NEXT: # kill: def $cl killed $cl killed $ecx 153; X64-NEXT: shldl %cl, %esi, %eax 154; X64-NEXT: retq 155 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 156 ret i32 %tmp 157} 158 159define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 { 160; X86-LABEL: var_shift_i32_pgso: 161; X86: # %bb.0: 162; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 163; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 164; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 165; X86-NEXT: shldl %cl, %edx, %eax 166; X86-NEXT: retl 167; 168; X64-LABEL: var_shift_i32_pgso: 169; X64: # %bb.0: 170; X64-NEXT: movl %edx, %ecx 171; X64-NEXT: movl %edi, %eax 172; X64-NEXT: # kill: def $cl killed $cl killed $ecx 173; X64-NEXT: shldl %cl, %esi, %eax 174; X64-NEXT: retq 175 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 176 ret i32 %tmp 177} 178 179define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind { 180; X86-FAST-LABEL: var_shift_i64: 181; X86-FAST: # %bb.0: 182; X86-FAST-NEXT: pushl %edi 183; X86-FAST-NEXT: pushl %esi 184; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi 185; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 186; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 187; X86-FAST-NEXT: testb $32, %cl 188; X86-FAST-NEXT: jne .LBB5_1 189; X86-FAST-NEXT: # %bb.2: 190; X86-FAST-NEXT: movl %edx, %edi 191; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 192; X86-FAST-NEXT: jmp .LBB5_3 193; X86-FAST-NEXT: .LBB5_1: 194; X86-FAST-NEXT: movl %esi, %edi 195; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi 196; X86-FAST-NEXT: .LBB5_3: 197; X86-FAST-NEXT: movl %edi, %eax 198; X86-FAST-NEXT: shldl %cl, %esi, %eax 199; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 200; X86-FAST-NEXT: shldl %cl, %edi, %edx 201; X86-FAST-NEXT: popl %esi 202; X86-FAST-NEXT: popl %edi 203; X86-FAST-NEXT: retl 204; 205; X86-SLOW-LABEL: var_shift_i64: 206; X86-SLOW: # %bb.0: 207; X86-SLOW-NEXT: pushl %ebx 208; X86-SLOW-NEXT: pushl %edi 209; X86-SLOW-NEXT: pushl %esi 210; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 211; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 212; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx 213; X86-SLOW-NEXT: testb $32, %bl 214; X86-SLOW-NEXT: jne .LBB5_1 215; X86-SLOW-NEXT: # %bb.2: 216; X86-SLOW-NEXT: movl %edx, %esi 217; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 218; X86-SLOW-NEXT: jmp .LBB5_3 219; X86-SLOW-NEXT: .LBB5_1: 220; X86-SLOW-NEXT: movl %eax, %esi 221; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 222; X86-SLOW-NEXT: .LBB5_3: 223; X86-SLOW-NEXT: movl %esi, %edi 224; X86-SLOW-NEXT: movl %ebx, %ecx 225; X86-SLOW-NEXT: shll %cl, %edi 226; X86-SLOW-NEXT: shrl %eax 227; X86-SLOW-NEXT: notb %cl 228; X86-SLOW-NEXT: shrl %cl, %eax 229; X86-SLOW-NEXT: orl %edi, %eax 230; X86-SLOW-NEXT: shrl %esi 231; X86-SLOW-NEXT: shrl %cl, %esi 232; X86-SLOW-NEXT: movl %ebx, %ecx 233; X86-SLOW-NEXT: shll %cl, %edx 234; X86-SLOW-NEXT: orl %esi, %edx 235; X86-SLOW-NEXT: popl %esi 236; X86-SLOW-NEXT: popl %edi 237; X86-SLOW-NEXT: popl %ebx 238; X86-SLOW-NEXT: retl 239; 240; X64-FAST-LABEL: var_shift_i64: 241; X64-FAST: # %bb.0: 242; X64-FAST-NEXT: movq %rdx, %rcx 243; X64-FAST-NEXT: movq %rdi, %rax 244; X64-FAST-NEXT: # kill: def $cl killed $cl killed $rcx 245; X64-FAST-NEXT: shldq %cl, %rsi, %rax 246; X64-FAST-NEXT: retq 247; 248; X64-SLOW-LABEL: var_shift_i64: 249; X64-SLOW: # %bb.0: 250; X64-SLOW-NEXT: movq %rdx, %rcx 251; X64-SLOW-NEXT: movq %rsi, %rax 252; X64-SLOW-NEXT: shlq %cl, %rdi 253; X64-SLOW-NEXT: shrq %rax 254; X64-SLOW-NEXT: notb %cl 255; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $rcx 256; X64-SLOW-NEXT: shrq %cl, %rax 257; X64-SLOW-NEXT: orq %rdi, %rax 258; X64-SLOW-NEXT: retq 259 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 260 ret i64 %tmp 261} 262 263define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { 264; X86-FAST-LABEL: var_shift_i128: 265; X86-FAST: # %bb.0: 266; X86-FAST-NEXT: pushl %ebp 267; X86-FAST-NEXT: pushl %ebx 268; X86-FAST-NEXT: pushl %edi 269; X86-FAST-NEXT: pushl %esi 270; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi 271; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 272; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx 273; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi 274; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 275; X86-FAST-NEXT: testb $64, %cl 276; X86-FAST-NEXT: jne .LBB6_1 277; X86-FAST-NEXT: # %bb.2: 278; X86-FAST-NEXT: movl %ebx, %ebp 279; X86-FAST-NEXT: movl %esi, %ebx 280; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi 281; X86-FAST-NEXT: movl %edi, %eax 282; X86-FAST-NEXT: movl %edx, %edi 283; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 284; X86-FAST-NEXT: testb $32, %cl 285; X86-FAST-NEXT: je .LBB6_5 286; X86-FAST-NEXT: .LBB6_4: 287; X86-FAST-NEXT: movl %esi, %edx 288; X86-FAST-NEXT: movl %edi, %esi 289; X86-FAST-NEXT: movl %ebx, %edi 290; X86-FAST-NEXT: movl %eax, %ebx 291; X86-FAST-NEXT: jmp .LBB6_6 292; X86-FAST-NEXT: .LBB6_1: 293; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp 294; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 295; X86-FAST-NEXT: testb $32, %cl 296; X86-FAST-NEXT: jne .LBB6_4 297; X86-FAST-NEXT: .LBB6_5: 298; X86-FAST-NEXT: movl %eax, %ebp 299; X86-FAST-NEXT: .LBB6_6: 300; X86-FAST-NEXT: movl %ebx, %eax 301; X86-FAST-NEXT: shldl %cl, %ebp, %eax 302; X86-FAST-NEXT: movl %edi, %ebp 303; X86-FAST-NEXT: shldl %cl, %ebx, %ebp 304; X86-FAST-NEXT: movl %esi, %ebx 305; X86-FAST-NEXT: shldl %cl, %edi, %ebx 306; X86-FAST-NEXT: # kill: def $cl killed $cl killed $ecx 307; X86-FAST-NEXT: shldl %cl, %esi, %edx 308; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 309; X86-FAST-NEXT: movl %edx, 12(%ecx) 310; X86-FAST-NEXT: movl %ebx, 8(%ecx) 311; X86-FAST-NEXT: movl %ebp, 4(%ecx) 312; X86-FAST-NEXT: movl %eax, (%ecx) 313; X86-FAST-NEXT: movl %ecx, %eax 314; X86-FAST-NEXT: popl %esi 315; X86-FAST-NEXT: popl %edi 316; X86-FAST-NEXT: popl %ebx 317; X86-FAST-NEXT: popl %ebp 318; X86-FAST-NEXT: retl $4 319; 320; X86-SLOW-LABEL: var_shift_i128: 321; X86-SLOW: # %bb.0: 322; X86-SLOW-NEXT: pushl %ebp 323; X86-SLOW-NEXT: pushl %ebx 324; X86-SLOW-NEXT: pushl %edi 325; X86-SLOW-NEXT: pushl %esi 326; X86-SLOW-NEXT: pushl %eax 327; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx 328; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi 329; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 330; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi 331; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 332; X86-SLOW-NEXT: testb $64, %al 333; X86-SLOW-NEXT: jne .LBB6_1 334; X86-SLOW-NEXT: # %bb.2: 335; X86-SLOW-NEXT: movl %edx, %ebp 336; X86-SLOW-NEXT: movl %ebx, %edx 337; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx 338; X86-SLOW-NEXT: movl %edi, %ecx 339; X86-SLOW-NEXT: movl %esi, %edi 340; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi 341; X86-SLOW-NEXT: testb $32, %al 342; X86-SLOW-NEXT: je .LBB6_5 343; X86-SLOW-NEXT: .LBB6_4: 344; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill 345; X86-SLOW-NEXT: movl %edi, %ebx 346; X86-SLOW-NEXT: movl %edx, %edi 347; X86-SLOW-NEXT: movl %ecx, %edx 348; X86-SLOW-NEXT: jmp .LBB6_6 349; X86-SLOW-NEXT: .LBB6_1: 350; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp 351; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx 352; X86-SLOW-NEXT: testb $32, %al 353; X86-SLOW-NEXT: jne .LBB6_4 354; X86-SLOW-NEXT: .LBB6_5: 355; X86-SLOW-NEXT: movl %ecx, %ebp 356; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill 357; X86-SLOW-NEXT: .LBB6_6: 358; X86-SLOW-NEXT: movl %edx, %esi 359; X86-SLOW-NEXT: movl %eax, %ecx 360; X86-SLOW-NEXT: shll %cl, %esi 361; X86-SLOW-NEXT: shrl %ebp 362; X86-SLOW-NEXT: movb %al, %ch 363; X86-SLOW-NEXT: notb %ch 364; X86-SLOW-NEXT: movb %ch, %cl 365; X86-SLOW-NEXT: shrl %cl, %ebp 366; X86-SLOW-NEXT: orl %esi, %ebp 367; X86-SLOW-NEXT: movl %edi, %esi 368; X86-SLOW-NEXT: movb %al, %cl 369; X86-SLOW-NEXT: shll %cl, %esi 370; X86-SLOW-NEXT: shrl %edx 371; X86-SLOW-NEXT: movb %ch, %cl 372; X86-SLOW-NEXT: shrl %cl, %edx 373; X86-SLOW-NEXT: orl %esi, %edx 374; X86-SLOW-NEXT: movl %ebx, %esi 375; X86-SLOW-NEXT: movb %al, %cl 376; X86-SLOW-NEXT: shll %cl, %esi 377; X86-SLOW-NEXT: shrl %edi 378; X86-SLOW-NEXT: movb %ch, %cl 379; X86-SLOW-NEXT: shrl %cl, %edi 380; X86-SLOW-NEXT: orl %esi, %edi 381; X86-SLOW-NEXT: movb %al, %cl 382; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload 383; X86-SLOW-NEXT: shll %cl, %eax 384; X86-SLOW-NEXT: shrl %ebx 385; X86-SLOW-NEXT: movb %ch, %cl 386; X86-SLOW-NEXT: shrl %cl, %ebx 387; X86-SLOW-NEXT: orl %eax, %ebx 388; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 389; X86-SLOW-NEXT: movl %ebx, 12(%eax) 390; X86-SLOW-NEXT: movl %edi, 8(%eax) 391; X86-SLOW-NEXT: movl %edx, 4(%eax) 392; X86-SLOW-NEXT: movl %ebp, (%eax) 393; X86-SLOW-NEXT: addl $4, %esp 394; X86-SLOW-NEXT: popl %esi 395; X86-SLOW-NEXT: popl %edi 396; X86-SLOW-NEXT: popl %ebx 397; X86-SLOW-NEXT: popl %ebp 398; X86-SLOW-NEXT: retl $4 399; 400; X64-FAST-LABEL: var_shift_i128: 401; X64-FAST: # %bb.0: 402; X64-FAST-NEXT: testb $64, %r8b 403; X64-FAST-NEXT: cmovneq %rdi, %rsi 404; X64-FAST-NEXT: cmoveq %rcx, %rdx 405; X64-FAST-NEXT: cmovneq %rcx, %rdi 406; X64-FAST-NEXT: movq %rdi, %rax 407; X64-FAST-NEXT: movl %r8d, %ecx 408; X64-FAST-NEXT: shldq %cl, %rdx, %rax 409; X64-FAST-NEXT: shldq %cl, %rdi, %rsi 410; X64-FAST-NEXT: movq %rsi, %rdx 411; X64-FAST-NEXT: retq 412; 413; X64-SLOW-LABEL: var_shift_i128: 414; X64-SLOW: # %bb.0: 415; X64-SLOW-NEXT: testb $64, %r8b 416; X64-SLOW-NEXT: cmovneq %rdi, %rsi 417; X64-SLOW-NEXT: cmoveq %rcx, %rdx 418; X64-SLOW-NEXT: cmovneq %rcx, %rdi 419; X64-SLOW-NEXT: movq %rdi, %rax 420; X64-SLOW-NEXT: movl %r8d, %ecx 421; X64-SLOW-NEXT: shlq %cl, %rax 422; X64-SLOW-NEXT: shrq %rdx 423; X64-SLOW-NEXT: movl %r8d, %r9d 424; X64-SLOW-NEXT: notb %r9b 425; X64-SLOW-NEXT: movl %r9d, %ecx 426; X64-SLOW-NEXT: shrq %cl, %rdx 427; X64-SLOW-NEXT: orq %rdx, %rax 428; X64-SLOW-NEXT: movl %r8d, %ecx 429; X64-SLOW-NEXT: shlq %cl, %rsi 430; X64-SLOW-NEXT: shrq %rdi 431; X64-SLOW-NEXT: movl %r9d, %ecx 432; X64-SLOW-NEXT: shrq %cl, %rdi 433; X64-SLOW-NEXT: orq %rsi, %rdi 434; X64-SLOW-NEXT: movq %rdi, %rdx 435; X64-SLOW-NEXT: retq 436 %tmp = tail call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 437 ret i128 %tmp 438} 439 440; 441; Const Funnel Shift 442; 443 444define i8 @const_shift_i8(i8 %x, i8 %y) nounwind { 445; X86-LABEL: const_shift_i8: 446; X86: # %bb.0: 447; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 448; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 449; X86-NEXT: shrb %cl 450; X86-NEXT: shlb $7, %al 451; X86-NEXT: orb %cl, %al 452; X86-NEXT: retl 453; 454; X64-LABEL: const_shift_i8: 455; X64: # %bb.0: 456; X64-NEXT: # kill: def $esi killed $esi def $rsi 457; X64-NEXT: # kill: def $edi killed $edi def $rdi 458; X64-NEXT: shrb %sil 459; X64-NEXT: shlb $7, %dil 460; X64-NEXT: leal (%rdi,%rsi), %eax 461; X64-NEXT: # kill: def $al killed $al killed $eax 462; X64-NEXT: retq 463 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7) 464 ret i8 %tmp 465} 466 467define i16 @const_shift_i16(i16 %x, i16 %y) nounwind { 468; X86-FAST-LABEL: const_shift_i16: 469; X86-FAST: # %bb.0: 470; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 471; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax 472; X86-FAST-NEXT: shldw $7, %cx, %ax 473; X86-FAST-NEXT: retl 474; 475; X86-SLOW-LABEL: const_shift_i16: 476; X86-SLOW: # %bb.0: 477; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 478; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 479; X86-SLOW-NEXT: shrl $9, %ecx 480; X86-SLOW-NEXT: shll $7, %eax 481; X86-SLOW-NEXT: orl %ecx, %eax 482; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 483; X86-SLOW-NEXT: retl 484; 485; X64-FAST-LABEL: const_shift_i16: 486; X64-FAST: # %bb.0: 487; X64-FAST-NEXT: movl %edi, %eax 488; X64-FAST-NEXT: shldw $7, %si, %ax 489; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax 490; X64-FAST-NEXT: retq 491; 492; X64-SLOW-LABEL: const_shift_i16: 493; X64-SLOW: # %bb.0: 494; X64-SLOW-NEXT: movzwl %si, %eax 495; X64-SLOW-NEXT: shll $7, %edi 496; X64-SLOW-NEXT: shrl $9, %eax 497; X64-SLOW-NEXT: orl %edi, %eax 498; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax 499; X64-SLOW-NEXT: retq 500 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7) 501 ret i16 %tmp 502} 503 504define i32 @const_shift_i32(i32 %x, i32 %y) nounwind { 505; X86-FAST-LABEL: const_shift_i32: 506; X86-FAST: # %bb.0: 507; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 508; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 509; X86-FAST-NEXT: shldl $7, %ecx, %eax 510; X86-FAST-NEXT: retl 511; 512; X86-SLOW-LABEL: const_shift_i32: 513; X86-SLOW: # %bb.0: 514; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax 515; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx 516; X86-SLOW-NEXT: shrl $25, %ecx 517; X86-SLOW-NEXT: shll $7, %eax 518; X86-SLOW-NEXT: orl %ecx, %eax 519; X86-SLOW-NEXT: retl 520; 521; X64-FAST-LABEL: const_shift_i32: 522; X64-FAST: # %bb.0: 523; X64-FAST-NEXT: movl %edi, %eax 524; X64-FAST-NEXT: shldl $7, %esi, %eax 525; X64-FAST-NEXT: retq 526; 527; X64-SLOW-LABEL: const_shift_i32: 528; X64-SLOW: # %bb.0: 529; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi 530; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi 531; X64-SLOW-NEXT: shrl $25, %esi 532; X64-SLOW-NEXT: shll $7, %edi 533; X64-SLOW-NEXT: leal (%rdi,%rsi), %eax 534; X64-SLOW-NEXT: retq 535 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7) 536 ret i32 %tmp 537} 538 539define i64 @const_shift_i64(i64 %x, i64 %y) nounwind { 540; X86-FAST-LABEL: const_shift_i64: 541; X86-FAST: # %bb.0: 542; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax 543; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx 544; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx 545; X86-FAST-NEXT: shrdl $25, %ecx, %eax 546; X86-FAST-NEXT: shldl $7, %ecx, %edx 547; X86-FAST-NEXT: retl 548; 549; X86-SLOW-LABEL: const_shift_i64: 550; X86-SLOW: # %bb.0: 551; X86-SLOW-NEXT: pushl %esi 552; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx 553; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx 554; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi 555; X86-SLOW-NEXT: shrl $25, %esi 556; X86-SLOW-NEXT: movl %ecx, %eax 557; X86-SLOW-NEXT: shll $7, %eax 558; X86-SLOW-NEXT: orl %esi, %eax 559; X86-SLOW-NEXT: shrl $25, %ecx 560; X86-SLOW-NEXT: shll $7, %edx 561; X86-SLOW-NEXT: orl %ecx, %edx 562; X86-SLOW-NEXT: popl %esi 563; X86-SLOW-NEXT: retl 564; 565; X64-FAST-LABEL: const_shift_i64: 566; X64-FAST: # %bb.0: 567; X64-FAST-NEXT: movq %rdi, %rax 568; X64-FAST-NEXT: shldq $7, %rsi, %rax 569; X64-FAST-NEXT: retq 570; 571; X64-SLOW-LABEL: const_shift_i64: 572; X64-SLOW: # %bb.0: 573; X64-SLOW-NEXT: shrq $57, %rsi 574; X64-SLOW-NEXT: shlq $7, %rdi 575; X64-SLOW-NEXT: leaq (%rdi,%rsi), %rax 576; X64-SLOW-NEXT: retq 577 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7) 578 ret i64 %tmp 579} 580 581; 582; Combine Consecutive Loads 583; 584 585define i8 @combine_fshl_load_i8(ptr %p) nounwind { 586; X86-LABEL: combine_fshl_load_i8: 587; X86: # %bb.0: 588; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 589; X86-NEXT: movzbl 1(%eax), %eax 590; X86-NEXT: retl 591; 592; X64-LABEL: combine_fshl_load_i8: 593; X64: # %bb.0: 594; X64-NEXT: movzbl 1(%rdi), %eax 595; X64-NEXT: retq 596 %p1 = getelementptr i8, ptr %p, i32 1 597 %ld0 = load i8, ptr%p 598 %ld1 = load i8, ptr%p1 599 %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8) 600 ret i8 %res 601} 602 603define i16 @combine_fshl_load_i16(ptr %p) nounwind { 604; X86-LABEL: combine_fshl_load_i16: 605; X86: # %bb.0: 606; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 607; X86-NEXT: movzwl 1(%eax), %eax 608; X86-NEXT: retl 609; 610; X64-LABEL: combine_fshl_load_i16: 611; X64: # %bb.0: 612; X64-NEXT: movzwl 1(%rdi), %eax 613; X64-NEXT: retq 614 %p1 = getelementptr i16, ptr %p, i32 1 615 %ld0 = load i16, ptr%p 616 %ld1 = load i16, ptr%p1 617 %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8) 618 ret i16 %res 619} 620 621define i32 @combine_fshl_load_i32(ptr %p) nounwind { 622; X86-LABEL: combine_fshl_load_i32: 623; X86: # %bb.0: 624; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 625; X86-NEXT: movl 11(%eax), %eax 626; X86-NEXT: retl 627; 628; X64-LABEL: combine_fshl_load_i32: 629; X64: # %bb.0: 630; X64-NEXT: movl 11(%rdi), %eax 631; X64-NEXT: retq 632 %p0 = getelementptr i32, ptr %p, i32 2 633 %p1 = getelementptr i32, ptr %p, i32 3 634 %ld0 = load i32, ptr%p0 635 %ld1 = load i32, ptr%p1 636 %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8) 637 ret i32 %res 638} 639 640define i64 @combine_fshl_load_i64(ptr %p) nounwind { 641; X86-LABEL: combine_fshl_load_i64: 642; X86: # %bb.0: 643; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 644; X86-NEXT: movl 13(%ecx), %eax 645; X86-NEXT: movl 17(%ecx), %edx 646; X86-NEXT: retl 647; 648; X64-LABEL: combine_fshl_load_i64: 649; X64: # %bb.0: 650; X64-NEXT: movq 13(%rdi), %rax 651; X64-NEXT: retq 652 %p0 = getelementptr i64, ptr %p, i64 1 653 %p1 = getelementptr i64, ptr %p, i64 2 654 %ld0 = load i64, ptr%p0 655 %ld1 = load i64, ptr%p1 656 %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24) 657 ret i64 %res 658} 659 660!llvm.module.flags = !{!0} 661!0 = !{i32 1, !"ProfileSummary", !1} 662!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 663!2 = !{!"ProfileFormat", !"InstrProf"} 664!3 = !{!"TotalCount", i64 10000} 665!4 = !{!"MaxCount", i64 10} 666!5 = !{!"MaxInternalCount", i64 1} 667!6 = !{!"MaxFunctionCount", i64 1000} 668!7 = !{!"NumCounts", i64 3} 669!8 = !{!"NumFunctions", i64 3} 670!9 = !{!"DetailedSummary", !10} 671!10 = !{!11, !12, !13} 672!11 = !{i32 10000, i64 100, i32 1} 673!12 = !{i32 999000, i64 100, i32 1} 674!13 = !{i32 999999, i64 1, i32 2} 675!14 = !{!"function_entry_count", i64 0} 676