1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-AVX2 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-VBMI2 5 6declare i8 @llvm.fshl.i8(i8, i8, i8) 7declare i16 @llvm.fshl.i16(i16, i16, i16) 8declare i32 @llvm.fshl.i32(i32, i32, i32) 9declare i64 @llvm.fshl.i64(i64, i64, i64) 10declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11declare i128 @llvm.fshl.i128(i128, i128, i128) 12 13declare i8 @llvm.fshr.i8(i8, i8, i8) 14declare i16 @llvm.fshr.i16(i16, i16, i16) 15declare i32 @llvm.fshr.i32(i32, i32, i32) 16declare i64 @llvm.fshr.i64(i64, i64, i64) 17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 18 19; General case - all operands can be variables 20 21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { 22; X86-SSE2-LABEL: fshl_i32: 23; X86-SSE2: # %bb.0: 24; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 25; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 26; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 27; X86-SSE2-NEXT: shldl %cl, %edx, %eax 28; X86-SSE2-NEXT: retl 29; 30; X64-AVX-LABEL: fshl_i32: 31; X64-AVX: # %bb.0: 32; X64-AVX-NEXT: movl %edx, %ecx 33; X64-AVX-NEXT: movl %edi, %eax 34; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 35; X64-AVX-NEXT: shldl %cl, %esi, %eax 36; X64-AVX-NEXT: retq 37 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 38 ret i32 %f 39} 40 41define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind { 42; X86-SSE2-LABEL: fshl_i64: 43; X86-SSE2: # %bb.0: 44; X86-SSE2-NEXT: pushl %edi 45; X86-SSE2-NEXT: pushl %esi 46; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 47; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 48; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 49; X86-SSE2-NEXT: testb $32, %cl 50; X86-SSE2-NEXT: movl %edx, %edi 51; X86-SSE2-NEXT: cmovnel %esi, %edi 52; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx 53; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %esi 54; X86-SSE2-NEXT: movl %edi, %eax 55; X86-SSE2-NEXT: shldl %cl, %esi, %eax 56; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 57; X86-SSE2-NEXT: shldl %cl, %edi, %edx 58; X86-SSE2-NEXT: popl %esi 59; X86-SSE2-NEXT: popl %edi 60; X86-SSE2-NEXT: retl 61; 62; X64-AVX-LABEL: fshl_i64: 63; X64-AVX: # %bb.0: 64; X64-AVX-NEXT: movq %rdx, %rcx 65; X64-AVX-NEXT: movq %rdi, %rax 66; X64-AVX-NEXT: # kill: def $cl killed $cl killed $rcx 67; X64-AVX-NEXT: shldq %cl, %rsi, %rax 68; X64-AVX-NEXT: retq 69 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) 70 ret i64 %f 71} 72 73define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind { 74; X86-SSE2-LABEL: fshl_i128: 75; X86-SSE2: # %bb.0: 76; X86-SSE2-NEXT: pushl %ebp 77; X86-SSE2-NEXT: pushl %ebx 78; X86-SSE2-NEXT: pushl %edi 79; X86-SSE2-NEXT: pushl %esi 80; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 81; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 82; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 83; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 84; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 85; X86-SSE2-NEXT: testb $64, %cl 86; X86-SSE2-NEXT: movl %esi, %eax 87; X86-SSE2-NEXT: cmovnel %ebx, %eax 88; X86-SSE2-NEXT: movl %edx, %ebp 89; X86-SSE2-NEXT: cmovnel %edi, %ebp 90; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi 91; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx 92; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx 93; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi 94; X86-SSE2-NEXT: testb $32, %cl 95; X86-SSE2-NEXT: cmovnel %esi, %edx 96; X86-SSE2-NEXT: cmovnel %ebp, %esi 97; X86-SSE2-NEXT: cmovnel %eax, %ebp 98; X86-SSE2-NEXT: cmovel %edi, %ebx 99; X86-SSE2-NEXT: cmovel %eax, %edi 100; X86-SSE2-NEXT: movl %edi, %eax 101; X86-SSE2-NEXT: shldl %cl, %ebx, %eax 102; X86-SSE2-NEXT: movl %ebp, %ebx 103; X86-SSE2-NEXT: shldl %cl, %edi, %ebx 104; X86-SSE2-NEXT: movl %esi, %edi 105; X86-SSE2-NEXT: shldl %cl, %ebp, %edi 106; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 107; X86-SSE2-NEXT: shldl %cl, %esi, %edx 108; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 109; X86-SSE2-NEXT: movl %edx, 12(%ecx) 110; X86-SSE2-NEXT: movl %edi, 8(%ecx) 111; X86-SSE2-NEXT: movl %ebx, 4(%ecx) 112; X86-SSE2-NEXT: movl %eax, (%ecx) 113; X86-SSE2-NEXT: movl %ecx, %eax 114; X86-SSE2-NEXT: popl %esi 115; X86-SSE2-NEXT: popl %edi 116; X86-SSE2-NEXT: popl %ebx 117; X86-SSE2-NEXT: popl %ebp 118; X86-SSE2-NEXT: retl $4 119; 120; X64-AVX-LABEL: fshl_i128: 121; X64-AVX: # %bb.0: 122; X64-AVX-NEXT: testb $64, %r8b 123; X64-AVX-NEXT: cmovneq %rdi, %rsi 124; X64-AVX-NEXT: cmoveq %rcx, %rdx 125; X64-AVX-NEXT: cmovneq %rcx, %rdi 126; X64-AVX-NEXT: movq %rdi, %rax 127; X64-AVX-NEXT: movl %r8d, %ecx 128; X64-AVX-NEXT: shldq %cl, %rdx, %rax 129; X64-AVX-NEXT: shldq %cl, %rdi, %rsi 130; X64-AVX-NEXT: movq %rsi, %rdx 131; X64-AVX-NEXT: retq 132 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) 133 ret i128 %f 134} 135 136; Verify that weird types are minimally supported. 137declare i37 @llvm.fshl.i37(i37, i37, i37) 138define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { 139; X86-SSE2-LABEL: fshl_i37: 140; X86-SSE2: # %bb.0: 141; X86-SSE2-NEXT: pushl %ebx 142; X86-SSE2-NEXT: pushl %edi 143; X86-SSE2-NEXT: pushl %esi 144; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-SSE2-NEXT: andl $31, %eax 146; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 147; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 148; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 149; X86-SSE2-NEXT: shldl $27, %ebx, %edi 150; X86-SSE2-NEXT: pushl $0 151; X86-SSE2-NEXT: pushl $37 152; X86-SSE2-NEXT: pushl %eax 153; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) 154; X86-SSE2-NEXT: calll __umoddi3 155; X86-SSE2-NEXT: addl $16, %esp 156; X86-SSE2-NEXT: movl %eax, %ecx 157; X86-SSE2-NEXT: testb $32, %cl 158; X86-SSE2-NEXT: jne .LBB3_1 159; X86-SSE2-NEXT: # %bb.2: 160; X86-SSE2-NEXT: movl %edi, %ebx 161; X86-SSE2-NEXT: movl %esi, %edi 162; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 163; X86-SSE2-NEXT: jmp .LBB3_3 164; X86-SSE2-NEXT: .LBB3_1: 165; X86-SSE2-NEXT: shll $27, %ebx 166; X86-SSE2-NEXT: .LBB3_3: 167; X86-SSE2-NEXT: movl %edi, %eax 168; X86-SSE2-NEXT: shldl %cl, %ebx, %eax 169; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 170; X86-SSE2-NEXT: shldl %cl, %edi, %esi 171; X86-SSE2-NEXT: movl %esi, %edx 172; X86-SSE2-NEXT: popl %esi 173; X86-SSE2-NEXT: popl %edi 174; X86-SSE2-NEXT: popl %ebx 175; X86-SSE2-NEXT: retl 176; 177; X64-AVX-LABEL: fshl_i37: 178; X64-AVX: # %bb.0: 179; X64-AVX-NEXT: movq %rdx, %rcx 180; X64-AVX-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF 181; X64-AVX-NEXT: andq %rdx, %rax 182; X64-AVX-NEXT: movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5 183; X64-AVX-NEXT: mulq %rdx 184; X64-AVX-NEXT: leal (%rdx,%rdx,8), %eax 185; X64-AVX-NEXT: leal (%rdx,%rax,4), %eax 186; X64-AVX-NEXT: subl %eax, %ecx 187; X64-AVX-NEXT: shlq $27, %rsi 188; X64-AVX-NEXT: # kill: def $cl killed $cl killed $rcx 189; X64-AVX-NEXT: shldq %cl, %rsi, %rdi 190; X64-AVX-NEXT: movq %rdi, %rax 191; X64-AVX-NEXT: retq 192 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 193 ret i37 %f 194} 195 196; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 197 198declare i7 @llvm.fshl.i7(i7, i7, i7) 199define i7 @fshl_i7_const_fold() { 200; CHECK-LABEL: fshl_i7_const_fold: 201; CHECK: # %bb.0: 202; CHECK-NEXT: movb $67, %al 203; CHECK-NEXT: ret{{[l|q]}} 204 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 205 ret i7 %f 206} 207 208; With constant shift amount, this is 'shld' with constant operand. 209 210define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind { 211; X86-SSE2-LABEL: fshl_i32_const_shift: 212; X86-SSE2: # %bb.0: 213; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 214; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 215; X86-SSE2-NEXT: shldl $9, %ecx, %eax 216; X86-SSE2-NEXT: retl 217; 218; X64-AVX-LABEL: fshl_i32_const_shift: 219; X64-AVX: # %bb.0: 220; X64-AVX-NEXT: movl %edi, %eax 221; X64-AVX-NEXT: shldl $9, %esi, %eax 222; X64-AVX-NEXT: retq 223 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 224 ret i32 %f 225} 226 227; Check modulo math on shift amount. 228 229define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind { 230; X86-SSE2-LABEL: fshl_i32_const_overshift: 231; X86-SSE2: # %bb.0: 232; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 233; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 234; X86-SSE2-NEXT: shldl $9, %ecx, %eax 235; X86-SSE2-NEXT: retl 236; 237; X64-AVX-LABEL: fshl_i32_const_overshift: 238; X64-AVX: # %bb.0: 239; X64-AVX-NEXT: movl %edi, %eax 240; X64-AVX-NEXT: shldl $9, %esi, %eax 241; X64-AVX-NEXT: retq 242 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 243 ret i32 %f 244} 245 246; 64-bit should also work. 247 248define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind { 249; X86-SSE2-LABEL: fshl_i64_const_overshift: 250; X86-SSE2: # %bb.0: 251; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 252; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 253; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 254; X86-SSE2-NEXT: shldl $9, %ecx, %edx 255; X86-SSE2-NEXT: shrdl $23, %ecx, %eax 256; X86-SSE2-NEXT: retl 257; 258; X64-AVX-LABEL: fshl_i64_const_overshift: 259; X64-AVX: # %bb.0: 260; X64-AVX-NEXT: movq %rdi, %rax 261; X64-AVX-NEXT: shldq $41, %rsi, %rax 262; X64-AVX-NEXT: retq 263 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 264 ret i64 %f 265} 266 267; This should work without any node-specific logic. 268 269define i8 @fshl_i8_const_fold() nounwind { 270; CHECK-LABEL: fshl_i8_const_fold: 271; CHECK: # %bb.0: 272; CHECK-NEXT: movb $-128, %al 273; CHECK-NEXT: ret{{[l|q]}} 274 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 275 ret i8 %f 276} 277 278; Repeat everything for funnel shift right. 279 280; General case - all operands can be variables 281 282define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind { 283; X86-SSE2-LABEL: fshr_i32: 284; X86-SSE2: # %bb.0: 285; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 286; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 287; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 288; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 289; X86-SSE2-NEXT: retl 290; 291; X64-AVX-LABEL: fshr_i32: 292; X64-AVX: # %bb.0: 293; X64-AVX-NEXT: movl %edx, %ecx 294; X64-AVX-NEXT: movl %esi, %eax 295; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 296; X64-AVX-NEXT: shrdl %cl, %edi, %eax 297; X64-AVX-NEXT: retq 298 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 299 ret i32 %f 300} 301 302; Verify that weird types are minimally supported. 303declare i37 @llvm.fshr.i37(i37, i37, i37) 304define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { 305; X86-SSE2-LABEL: fshr_i37: 306; X86-SSE2: # %bb.0: 307; X86-SSE2-NEXT: pushl %ebx 308; X86-SSE2-NEXT: pushl %edi 309; X86-SSE2-NEXT: pushl %esi 310; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86-SSE2-NEXT: andl $31, %eax 312; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 313; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 314; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 315; X86-SSE2-NEXT: shldl $27, %ebx, %esi 316; X86-SSE2-NEXT: pushl $0 317; X86-SSE2-NEXT: pushl $37 318; X86-SSE2-NEXT: pushl %eax 319; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp) 320; X86-SSE2-NEXT: calll __umoddi3 321; X86-SSE2-NEXT: addl $16, %esp 322; X86-SSE2-NEXT: movl %eax, %ecx 323; X86-SSE2-NEXT: addl $27, %ecx 324; X86-SSE2-NEXT: testb $32, %cl 325; X86-SSE2-NEXT: je .LBB10_1 326; X86-SSE2-NEXT: # %bb.2: 327; X86-SSE2-NEXT: movl %edi, %edx 328; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 329; X86-SSE2-NEXT: jmp .LBB10_3 330; X86-SSE2-NEXT: .LBB10_1: 331; X86-SSE2-NEXT: shll $27, %ebx 332; X86-SSE2-NEXT: movl %esi, %edx 333; X86-SSE2-NEXT: movl %ebx, %esi 334; X86-SSE2-NEXT: .LBB10_3: 335; X86-SSE2-NEXT: shrdl %cl, %edx, %esi 336; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 337; X86-SSE2-NEXT: shrdl %cl, %edi, %edx 338; X86-SSE2-NEXT: movl %esi, %eax 339; X86-SSE2-NEXT: popl %esi 340; X86-SSE2-NEXT: popl %edi 341; X86-SSE2-NEXT: popl %ebx 342; X86-SSE2-NEXT: retl 343; 344; X64-AVX-LABEL: fshr_i37: 345; X64-AVX: # %bb.0: 346; X64-AVX-NEXT: movq %rdx, %rcx 347; X64-AVX-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF 348; X64-AVX-NEXT: andq %rdx, %rax 349; X64-AVX-NEXT: movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5 350; X64-AVX-NEXT: mulq %rdx 351; X64-AVX-NEXT: leal (%rdx,%rdx,8), %eax 352; X64-AVX-NEXT: leal (%rdx,%rax,4), %eax 353; X64-AVX-NEXT: subl %eax, %ecx 354; X64-AVX-NEXT: addl $27, %ecx 355; X64-AVX-NEXT: shlq $27, %rsi 356; X64-AVX-NEXT: # kill: def $cl killed $cl killed $rcx 357; X64-AVX-NEXT: shrdq %cl, %rdi, %rsi 358; X64-AVX-NEXT: movq %rsi, %rax 359; X64-AVX-NEXT: retq 360 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 361 ret i37 %f 362} 363 364; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 365 366declare i7 @llvm.fshr.i7(i7, i7, i7) 367define i7 @fshr_i7_const_fold() nounwind { 368; CHECK-LABEL: fshr_i7_const_fold: 369; CHECK: # %bb.0: 370; CHECK-NEXT: movb $31, %al 371; CHECK-NEXT: ret{{[l|q]}} 372 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 373 ret i7 %f 374} 375 376; demanded bits tests 377 378define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind { 379; X86-SSE2-LABEL: fshl_i32_demandedbits: 380; X86-SSE2: # %bb.0: 381; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 382; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 383; X86-SSE2-NEXT: shldl $9, %ecx, %eax 384; X86-SSE2-NEXT: retl 385; 386; X64-AVX-LABEL: fshl_i32_demandedbits: 387; X64-AVX: # %bb.0: 388; X64-AVX-NEXT: movl %edi, %eax 389; X64-AVX-NEXT: shldl $9, %esi, %eax 390; X64-AVX-NEXT: retq 391 %x = or i32 %a0, 2147483648 392 %y = or i32 %a1, 1 393 %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 394 ret i32 %res 395} 396 397define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind { 398; X86-SSE2-LABEL: fshr_i32_demandedbits: 399; X86-SSE2: # %bb.0: 400; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 401; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 402; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 403; X86-SSE2-NEXT: retl 404; 405; X64-AVX-LABEL: fshr_i32_demandedbits: 406; X64-AVX: # %bb.0: 407; X64-AVX-NEXT: movl %edi, %eax 408; X64-AVX-NEXT: shldl $23, %esi, %eax 409; X64-AVX-NEXT: retq 410 %x = or i32 %a0, 2147483648 411 %y = or i32 %a1, 1 412 %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 413 ret i32 %res 414} 415 416; undef handling 417 418define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind { 419; X86-SSE2-LABEL: fshl_i32_undef0: 420; X86-SSE2: # %bb.0: 421; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 422; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 423; X86-SSE2-NEXT: shldl %cl, %eax, %eax 424; X86-SSE2-NEXT: retl 425; 426; X64-AVX-LABEL: fshl_i32_undef0: 427; X64-AVX: # %bb.0: 428; X64-AVX-NEXT: movl %esi, %ecx 429; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 430; X64-AVX-NEXT: shldl %cl, %edi, %eax 431; X64-AVX-NEXT: retq 432 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1) 433 ret i32 %res 434} 435 436define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind { 437; X86-SSE2-LABEL: fshl_i32_undef0_msk: 438; X86-SSE2: # %bb.0: 439; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 440; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 441; X86-SSE2-NEXT: andl $7, %ecx 442; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 443; X86-SSE2-NEXT: shldl %cl, %eax, %eax 444; X86-SSE2-NEXT: retl 445; 446; X64-AVX-LABEL: fshl_i32_undef0_msk: 447; X64-AVX: # %bb.0: 448; X64-AVX-NEXT: movl %esi, %ecx 449; X64-AVX-NEXT: andl $7, %ecx 450; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 451; X64-AVX-NEXT: shldl %cl, %edi, %eax 452; X64-AVX-NEXT: retq 453 %m = and i32 %a1, 7 454 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m) 455 ret i32 %res 456} 457 458define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind { 459; X86-SSE2-LABEL: fshl_i32_undef0_cst: 460; X86-SSE2: # %bb.0: 461; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 462; X86-SSE2-NEXT: shrl $23, %eax 463; X86-SSE2-NEXT: retl 464; 465; X64-AVX-LABEL: fshl_i32_undef0_cst: 466; X64-AVX: # %bb.0: 467; X64-AVX-NEXT: movl %edi, %eax 468; X64-AVX-NEXT: shrl $23, %eax 469; X64-AVX-NEXT: retq 470 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9) 471 ret i32 %res 472} 473 474define <4 x i32> @fshl_v4i32_undef0_cst(<4 x i32> %a0) nounwind { 475; X86-SSE2-LABEL: fshl_v4i32_undef0_cst: 476; X86-SSE2: # %bb.0: 477; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 478; X86-SSE2-NEXT: psrld $20, %xmm1 479; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 480; X86-SSE2-NEXT: psrld $21, %xmm2 481; X86-SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 482; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 483; X86-SSE2-NEXT: psrld $22, %xmm1 484; X86-SSE2-NEXT: psrld $23, %xmm0 485; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 486; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 487; X86-SSE2-NEXT: retl 488; 489; X64-AVX2-LABEL: fshl_v4i32_undef0_cst: 490; X64-AVX2: # %bb.0: 491; X64-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 492; X64-AVX2-NEXT: retq 493; 494; X64-VBMI2-LABEL: fshl_v4i32_undef0_cst: 495; X64-VBMI2: # %bb.0: 496; X64-VBMI2-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 497; X64-VBMI2-NEXT: retq 498 %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>) 499 ret <4 x i32> %res 500} 501 502define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind { 503; X86-SSE2-LABEL: fshl_i32_undef1: 504; X86-SSE2: # %bb.0: 505; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 506; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 507; X86-SSE2-NEXT: shldl %cl, %eax, %eax 508; X86-SSE2-NEXT: retl 509; 510; X64-AVX-LABEL: fshl_i32_undef1: 511; X64-AVX: # %bb.0: 512; X64-AVX-NEXT: movl %esi, %ecx 513; X64-AVX-NEXT: movl %edi, %eax 514; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 515; X64-AVX-NEXT: shldl %cl, %eax, %eax 516; X64-AVX-NEXT: retq 517 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1) 518 ret i32 %res 519} 520 521define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind { 522; X86-SSE2-LABEL: fshl_i32_undef1_msk: 523; X86-SSE2: # %bb.0: 524; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 525; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 526; X86-SSE2-NEXT: andb $7, %cl 527; X86-SSE2-NEXT: shll %cl, %eax 528; X86-SSE2-NEXT: retl 529; 530; X64-AVX-LABEL: fshl_i32_undef1_msk: 531; X64-AVX: # %bb.0: 532; X64-AVX-NEXT: movl %esi, %ecx 533; X64-AVX-NEXT: movl %edi, %eax 534; X64-AVX-NEXT: andb $7, %cl 535; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 536; X64-AVX-NEXT: shll %cl, %eax 537; X64-AVX-NEXT: retq 538 %m = and i32 %a1, 7 539 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m) 540 ret i32 %res 541} 542 543define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind { 544; X86-SSE2-LABEL: fshl_i32_undef1_cst: 545; X86-SSE2: # %bb.0: 546; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 547; X86-SSE2-NEXT: shll $9, %eax 548; X86-SSE2-NEXT: retl 549; 550; X64-AVX-LABEL: fshl_i32_undef1_cst: 551; X64-AVX: # %bb.0: 552; X64-AVX-NEXT: movl %edi, %eax 553; X64-AVX-NEXT: shll $9, %eax 554; X64-AVX-NEXT: retq 555 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9) 556 ret i32 %res 557} 558 559define <4 x i32> @fshl_v4i32_undef1_cst(<4 x i32> %a0) nounwind { 560; X86-SSE2-LABEL: fshl_v4i32_undef1_cst: 561; X86-SSE2: # %bb.0: 562; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 563; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 564; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 565; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 566; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 567; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 568; X86-SSE2-NEXT: retl 569; 570; X64-AVX-LABEL: fshl_v4i32_undef1_cst: 571; X64-AVX: # %bb.0: 572; X64-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 573; X64-AVX-NEXT: retq 574 %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>) 575 ret <4 x i32> %res 576} 577 578define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind { 579; X86-SSE2-LABEL: fshl_i32_undef2: 580; X86-SSE2: # %bb.0: 581; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 582; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 583; X86-SSE2-NEXT: shldl %cl, %ecx, %eax 584; X86-SSE2-NEXT: retl 585; 586; X64-AVX-LABEL: fshl_i32_undef2: 587; X64-AVX: # %bb.0: 588; X64-AVX-NEXT: movl %edi, %eax 589; X64-AVX-NEXT: shldl %cl, %esi, %eax 590; X64-AVX-NEXT: retq 591 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef) 592 ret i32 %res 593} 594 595define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind { 596; X86-SSE2-LABEL: fshr_i32_undef0: 597; X86-SSE2: # %bb.0: 598; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 599; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 600; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 601; X86-SSE2-NEXT: retl 602; 603; X64-AVX-LABEL: fshr_i32_undef0: 604; X64-AVX: # %bb.0: 605; X64-AVX-NEXT: movl %esi, %ecx 606; X64-AVX-NEXT: movl %edi, %eax 607; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 608; X64-AVX-NEXT: shrdl %cl, %eax, %eax 609; X64-AVX-NEXT: retq 610 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1) 611 ret i32 %res 612} 613 614define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind { 615; X86-SSE2-LABEL: fshr_i32_undef0_msk: 616; X86-SSE2: # %bb.0: 617; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 618; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 619; X86-SSE2-NEXT: andb $7, %cl 620; X86-SSE2-NEXT: shrl %cl, %eax 621; X86-SSE2-NEXT: retl 622; 623; X64-AVX-LABEL: fshr_i32_undef0_msk: 624; X64-AVX: # %bb.0: 625; X64-AVX-NEXT: movl %esi, %ecx 626; X64-AVX-NEXT: movl %edi, %eax 627; X64-AVX-NEXT: andb $7, %cl 628; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 629; X64-AVX-NEXT: shrl %cl, %eax 630; X64-AVX-NEXT: retq 631 %m = and i32 %a1, 7 632 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m) 633 ret i32 %res 634} 635 636define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind { 637; X86-SSE2-LABEL: fshr_i32_undef0_cst: 638; X86-SSE2: # %bb.0: 639; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 640; X86-SSE2-NEXT: shrl $9, %eax 641; X86-SSE2-NEXT: retl 642; 643; X64-AVX-LABEL: fshr_i32_undef0_cst: 644; X64-AVX: # %bb.0: 645; X64-AVX-NEXT: movl %edi, %eax 646; X64-AVX-NEXT: shrl $9, %eax 647; X64-AVX-NEXT: retq 648 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9) 649 ret i32 %res 650} 651 652define <4 x i32> @fshr_v4i32_undef0_cst(<4 x i32> %a0) nounwind { 653; X86-SSE2-LABEL: fshr_v4i32_undef0_cst: 654; X86-SSE2: # %bb.0: 655; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 656; X86-SSE2-NEXT: psrld $12, %xmm1 657; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 658; X86-SSE2-NEXT: psrld $11, %xmm2 659; X86-SSE2-NEXT: punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1] 660; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 661; X86-SSE2-NEXT: psrld $10, %xmm1 662; X86-SSE2-NEXT: psrld $9, %xmm0 663; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 664; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3] 665; X86-SSE2-NEXT: retl 666; 667; X64-AVX-LABEL: fshr_v4i32_undef0_cst: 668; X64-AVX: # %bb.0: 669; X64-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 670; X64-AVX-NEXT: retq 671 %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>) 672 ret <4 x i32> %res 673} 674 675define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind { 676; X86-SSE2-LABEL: fshr_i32_undef1: 677; X86-SSE2: # %bb.0: 678; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 679; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 680; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 681; X86-SSE2-NEXT: retl 682; 683; X64-AVX-LABEL: fshr_i32_undef1: 684; X64-AVX: # %bb.0: 685; X64-AVX-NEXT: movl %esi, %ecx 686; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 687; X64-AVX-NEXT: shrdl %cl, %edi, %eax 688; X64-AVX-NEXT: retq 689 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1) 690 ret i32 %res 691} 692 693define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind { 694; X86-SSE2-LABEL: fshr_i32_undef1_msk: 695; X86-SSE2: # %bb.0: 696; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 697; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 698; X86-SSE2-NEXT: andl $7, %ecx 699; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 700; X86-SSE2-NEXT: shrdl %cl, %eax, %eax 701; X86-SSE2-NEXT: retl 702; 703; X64-AVX-LABEL: fshr_i32_undef1_msk: 704; X64-AVX: # %bb.0: 705; X64-AVX-NEXT: movl %esi, %ecx 706; X64-AVX-NEXT: andl $7, %ecx 707; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 708; X64-AVX-NEXT: shrdl %cl, %edi, %eax 709; X64-AVX-NEXT: retq 710 %m = and i32 %a1, 7 711 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m) 712 ret i32 %res 713} 714 715define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind { 716; X86-SSE2-LABEL: fshr_i32_undef1_cst: 717; X86-SSE2: # %bb.0: 718; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 719; X86-SSE2-NEXT: shll $23, %eax 720; X86-SSE2-NEXT: retl 721; 722; X64-AVX-LABEL: fshr_i32_undef1_cst: 723; X64-AVX: # %bb.0: 724; X64-AVX-NEXT: movl %edi, %eax 725; X64-AVX-NEXT: shll $23, %eax 726; X64-AVX-NEXT: retq 727 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9) 728 ret i32 %res 729} 730 731define <4 x i32> @fshr_v4i32_undef1_cst(<4 x i32> %a0) nounwind { 732; X86-SSE2-LABEL: fshr_v4i32_undef1_cst: 733; X86-SSE2: # %bb.0: 734; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 735; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 736; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 737; X86-SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 738; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 739; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 740; X86-SSE2-NEXT: retl 741; 742; X64-AVX2-LABEL: fshr_v4i32_undef1_cst: 743; X64-AVX2: # %bb.0: 744; X64-AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 745; X64-AVX2-NEXT: retq 746; 747; X64-VBMI2-LABEL: fshr_v4i32_undef1_cst: 748; X64-VBMI2: # %bb.0: 749; X64-VBMI2-NEXT: vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 750; X64-VBMI2-NEXT: retq 751 %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>) 752 ret <4 x i32> %res 753} 754 755define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind { 756; X86-SSE2-LABEL: fshr_i32_undef2: 757; X86-SSE2: # %bb.0: 758; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 759; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 760; X86-SSE2-NEXT: shrdl %cl, %ecx, %eax 761; X86-SSE2-NEXT: retl 762; 763; X64-AVX-LABEL: fshr_i32_undef2: 764; X64-AVX: # %bb.0: 765; X64-AVX-NEXT: movl %esi, %eax 766; X64-AVX-NEXT: shrdl %cl, %edi, %eax 767; X64-AVX-NEXT: retq 768 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef) 769 ret i32 %res 770} 771 772; shift zero args 773 774define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind { 775; X86-SSE2-LABEL: fshl_i32_zero0: 776; X86-SSE2: # %bb.0: 777; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 778; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 779; X86-SSE2-NEXT: xorl %eax, %eax 780; X86-SSE2-NEXT: shldl %cl, %edx, %eax 781; X86-SSE2-NEXT: retl 782; 783; X64-AVX-LABEL: fshl_i32_zero0: 784; X64-AVX: # %bb.0: 785; X64-AVX-NEXT: movl %esi, %ecx 786; X64-AVX-NEXT: xorl %eax, %eax 787; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 788; X64-AVX-NEXT: shldl %cl, %edi, %eax 789; X64-AVX-NEXT: retq 790 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1) 791 ret i32 %res 792} 793 794define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind { 795; X86-SSE2-LABEL: fshl_i32_zero0_cst: 796; X86-SSE2: # %bb.0: 797; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 798; X86-SSE2-NEXT: shrl $23, %eax 799; X86-SSE2-NEXT: retl 800; 801; X64-AVX-LABEL: fshl_i32_zero0_cst: 802; X64-AVX: # %bb.0: 803; X64-AVX-NEXT: movl %edi, %eax 804; X64-AVX-NEXT: shrl $23, %eax 805; X64-AVX-NEXT: retq 806 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9) 807 ret i32 %res 808} 809 810define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind { 811; X86-SSE2-LABEL: fshl_i32_zero1: 812; X86-SSE2: # %bb.0: 813; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 814; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 815; X86-SSE2-NEXT: xorl %edx, %edx 816; X86-SSE2-NEXT: shldl %cl, %edx, %eax 817; X86-SSE2-NEXT: retl 818; 819; X64-AVX-LABEL: fshl_i32_zero1: 820; X64-AVX: # %bb.0: 821; X64-AVX-NEXT: movl %esi, %ecx 822; X64-AVX-NEXT: movl %edi, %eax 823; X64-AVX-NEXT: xorl %edx, %edx 824; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 825; X64-AVX-NEXT: shldl %cl, %edx, %eax 826; X64-AVX-NEXT: retq 827 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1) 828 ret i32 %res 829} 830 831define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind { 832; X86-SSE2-LABEL: fshl_i32_zero1_cst: 833; X86-SSE2: # %bb.0: 834; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 835; X86-SSE2-NEXT: shll $9, %eax 836; X86-SSE2-NEXT: retl 837; 838; X64-AVX-LABEL: fshl_i32_zero1_cst: 839; X64-AVX: # %bb.0: 840; X64-AVX-NEXT: movl %edi, %eax 841; X64-AVX-NEXT: shll $9, %eax 842; X64-AVX-NEXT: retq 843 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9) 844 ret i32 %res 845} 846 847define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind { 848; X86-SSE2-LABEL: fshr_i32_zero0: 849; X86-SSE2: # %bb.0: 850; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 851; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 852; X86-SSE2-NEXT: xorl %edx, %edx 853; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 854; X86-SSE2-NEXT: retl 855; 856; X64-AVX-LABEL: fshr_i32_zero0: 857; X64-AVX: # %bb.0: 858; X64-AVX-NEXT: movl %esi, %ecx 859; X64-AVX-NEXT: movl %edi, %eax 860; X64-AVX-NEXT: xorl %edx, %edx 861; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 862; X64-AVX-NEXT: shrdl %cl, %edx, %eax 863; X64-AVX-NEXT: retq 864 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1) 865 ret i32 %res 866} 867 868define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind { 869; X86-SSE2-LABEL: fshr_i32_zero0_cst: 870; X86-SSE2: # %bb.0: 871; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 872; X86-SSE2-NEXT: shrl $9, %eax 873; X86-SSE2-NEXT: retl 874; 875; X64-AVX-LABEL: fshr_i32_zero0_cst: 876; X64-AVX: # %bb.0: 877; X64-AVX-NEXT: movl %edi, %eax 878; X64-AVX-NEXT: shrl $9, %eax 879; X64-AVX-NEXT: retq 880 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9) 881 ret i32 %res 882} 883 884define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind { 885; X86-SSE2-LABEL: fshr_i32_zero1: 886; X86-SSE2: # %bb.0: 887; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 888; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 889; X86-SSE2-NEXT: xorl %eax, %eax 890; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 891; X86-SSE2-NEXT: retl 892; 893; X64-AVX-LABEL: fshr_i32_zero1: 894; X64-AVX: # %bb.0: 895; X64-AVX-NEXT: movl %esi, %ecx 896; X64-AVX-NEXT: xorl %eax, %eax 897; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 898; X64-AVX-NEXT: shrdl %cl, %edi, %eax 899; X64-AVX-NEXT: retq 900 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1) 901 ret i32 %res 902} 903 904define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind { 905; X86-SSE2-LABEL: fshr_i32_zero1_cst: 906; X86-SSE2: # %bb.0: 907; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 908; X86-SSE2-NEXT: shll $23, %eax 909; X86-SSE2-NEXT: retl 910; 911; X64-AVX-LABEL: fshr_i32_zero1_cst: 912; X64-AVX: # %bb.0: 913; X64-AVX-NEXT: movl %edi, %eax 914; X64-AVX-NEXT: shll $23, %eax 915; X64-AVX-NEXT: retq 916 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9) 917 ret i32 %res 918} 919 920; shift by zero 921 922define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind { 923; X86-SSE2-LABEL: fshl_i32_zero2: 924; X86-SSE2: # %bb.0: 925; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 926; X86-SSE2-NEXT: retl 927; 928; X64-AVX-LABEL: fshl_i32_zero2: 929; X64-AVX: # %bb.0: 930; X64-AVX-NEXT: movl %edi, %eax 931; X64-AVX-NEXT: retq 932 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0) 933 ret i32 %res 934} 935 936define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind { 937; X86-SSE2-LABEL: fshr_i32_zero2: 938; X86-SSE2: # %bb.0: 939; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 940; X86-SSE2-NEXT: retl 941; 942; X64-AVX-LABEL: fshr_i32_zero2: 943; X64-AVX: # %bb.0: 944; X64-AVX-NEXT: movl %esi, %eax 945; X64-AVX-NEXT: retq 946 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0) 947 ret i32 %res 948} 949 950; With constant shift amount, this is 'shrd' or 'shld'. 951 952define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind { 953; X86-SSE2-LABEL: fshr_i32_const_shift: 954; X86-SSE2: # %bb.0: 955; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 956; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 957; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 958; X86-SSE2-NEXT: retl 959; 960; X64-AVX-LABEL: fshr_i32_const_shift: 961; X64-AVX: # %bb.0: 962; X64-AVX-NEXT: movl %edi, %eax 963; X64-AVX-NEXT: shldl $23, %esi, %eax 964; X64-AVX-NEXT: retq 965 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 966 ret i32 %f 967} 968 969; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23. 970 971define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind { 972; X86-SSE2-LABEL: fshr_i32_const_overshift: 973; X86-SSE2: # %bb.0: 974; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 975; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 976; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 977; X86-SSE2-NEXT: retl 978; 979; X64-AVX-LABEL: fshr_i32_const_overshift: 980; X64-AVX: # %bb.0: 981; X64-AVX-NEXT: movl %edi, %eax 982; X64-AVX-NEXT: shldl $23, %esi, %eax 983; X64-AVX-NEXT: retq 984 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 985 ret i32 %f 986} 987 988; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23. 989 990define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind { 991; X86-SSE2-LABEL: fshr_i64_const_overshift: 992; X86-SSE2: # %bb.0: 993; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 994; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 995; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 996; X86-SSE2-NEXT: shrdl $9, %ecx, %eax 997; X86-SSE2-NEXT: shldl $23, %ecx, %edx 998; X86-SSE2-NEXT: retl 999; 1000; X64-AVX-LABEL: fshr_i64_const_overshift: 1001; X64-AVX: # %bb.0: 1002; X64-AVX-NEXT: movq %rdi, %rax 1003; X64-AVX-NEXT: shldq $23, %rsi, %rax 1004; X64-AVX-NEXT: retq 1005 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 1006 ret i64 %f 1007} 1008 1009; This should work without any node-specific logic. 1010 1011define i8 @fshr_i8_const_fold() nounwind { 1012; CHECK-LABEL: fshr_i8_const_fold: 1013; CHECK: # %bb.0: 1014; CHECK-NEXT: movb $-2, %al 1015; CHECK-NEXT: ret{{[l|q]}} 1016 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 1017 ret i8 %f 1018} 1019 1020define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 1021; X86-SSE2-LABEL: fshl_i32_shift_by_bitwidth: 1022; X86-SSE2: # %bb.0: 1023; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1024; X86-SSE2-NEXT: retl 1025; 1026; X64-AVX-LABEL: fshl_i32_shift_by_bitwidth: 1027; X64-AVX: # %bb.0: 1028; X64-AVX-NEXT: movl %edi, %eax 1029; X64-AVX-NEXT: retq 1030 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 1031 ret i32 %f 1032} 1033 1034define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 1035; X86-SSE2-LABEL: fshr_i32_shift_by_bitwidth: 1036; X86-SSE2: # %bb.0: 1037; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1038; X86-SSE2-NEXT: retl 1039; 1040; X64-AVX-LABEL: fshr_i32_shift_by_bitwidth: 1041; X64-AVX: # %bb.0: 1042; X64-AVX-NEXT: movl %esi, %eax 1043; X64-AVX-NEXT: retq 1044 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 1045 ret i32 %f 1046} 1047 1048define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 1049; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth: 1050; CHECK: # %bb.0: 1051; CHECK-NEXT: ret{{[l|q]}} 1052 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 1053 ret <4 x i32> %f 1054} 1055 1056define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 1057; X86-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth: 1058; X86-SSE2: # %bb.0: 1059; X86-SSE2-NEXT: movaps %xmm1, %xmm0 1060; X86-SSE2-NEXT: retl 1061; 1062; X64-AVX-LABEL: fshr_v4i32_shift_by_bitwidth: 1063; X64-AVX: # %bb.0: 1064; X64-AVX-NEXT: vmovaps %xmm1, %xmm0 1065; X64-AVX-NEXT: retq 1066 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 1067 ret <4 x i32> %f 1068} 1069 1070%struct.S = type { [11 x i8], i8 } 1071define void @PR45265(i32 %0, ptr nocapture readonly %1) nounwind { 1072; X86-SSE2-LABEL: PR45265: 1073; X86-SSE2: # %bb.0: 1074; X86-SSE2-NEXT: pushl %edi 1075; X86-SSE2-NEXT: pushl %esi 1076; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1077; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 1078; X86-SSE2-NEXT: leal (%eax,%eax,2), %esi 1079; X86-SSE2-NEXT: movzwl 8(%ecx,%esi,4), %edx 1080; X86-SSE2-NEXT: movl 4(%ecx,%esi,4), %edi 1081; X86-SSE2-NEXT: shrdl $8, %edx, %edi 1082; X86-SSE2-NEXT: xorl %eax, %edi 1083; X86-SSE2-NEXT: sarl $31, %eax 1084; X86-SSE2-NEXT: movzbl 10(%ecx,%esi,4), %ecx 1085; X86-SSE2-NEXT: shll $16, %ecx 1086; X86-SSE2-NEXT: orl %edx, %ecx 1087; X86-SSE2-NEXT: shll $8, %ecx 1088; X86-SSE2-NEXT: movl %ecx, %edx 1089; X86-SSE2-NEXT: sarl $8, %edx 1090; X86-SSE2-NEXT: sarl $31, %ecx 1091; X86-SSE2-NEXT: shldl $24, %edx, %ecx 1092; X86-SSE2-NEXT: xorl %eax, %ecx 1093; X86-SSE2-NEXT: orl %ecx, %edi 1094; X86-SSE2-NEXT: jne .LBB50_1 1095; X86-SSE2-NEXT: # %bb.2: 1096; X86-SSE2-NEXT: popl %esi 1097; X86-SSE2-NEXT: popl %edi 1098; X86-SSE2-NEXT: jmp _Z3foov # TAILCALL 1099; X86-SSE2-NEXT: .LBB50_1: 1100; X86-SSE2-NEXT: popl %esi 1101; X86-SSE2-NEXT: popl %edi 1102; X86-SSE2-NEXT: retl 1103; 1104; X64-AVX-LABEL: PR45265: 1105; X64-AVX: # %bb.0: 1106; X64-AVX-NEXT: movslq %edi, %rax 1107; X64-AVX-NEXT: leaq (%rax,%rax,2), %rcx 1108; X64-AVX-NEXT: movsbq 10(%rsi,%rcx,4), %rdx 1109; X64-AVX-NEXT: shlq $16, %rdx 1110; X64-AVX-NEXT: movzwl 8(%rsi,%rcx,4), %edi 1111; X64-AVX-NEXT: orq %rdx, %rdi 1112; X64-AVX-NEXT: movq (%rsi,%rcx,4), %rcx 1113; X64-AVX-NEXT: shrdq $40, %rdi, %rcx 1114; X64-AVX-NEXT: cmpq %rax, %rcx 1115; X64-AVX-NEXT: je _Z3foov # TAILCALL 1116; X64-AVX-NEXT: # %bb.1: 1117; X64-AVX-NEXT: retq 1118 %3 = sext i32 %0 to i64 1119 %4 = getelementptr inbounds %struct.S, ptr %1, i64 %3 1120 %5 = bitcast ptr %4 to ptr 1121 %6 = load i88, ptr %5, align 1 1122 %7 = ashr i88 %6, 40 1123 %8 = trunc i88 %7 to i64 1124 %9 = icmp eq i64 %8, %3 1125 br i1 %9, label %10, label %11 1126 112710: 1128 tail call void @_Z3foov() 1129 br label %11 1130 113111: 1132 ret void 1133} 1134declare dso_local void @_Z3foov() 1135 1136define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind { 1137; X86-SSE2-LABEL: or_shl_fshl: 1138; X86-SSE2: # %bb.0: 1139; X86-SSE2-NEXT: pushl %esi 1140; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1141; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1142; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1143; X86-SSE2-NEXT: movl %edx, %esi 1144; X86-SSE2-NEXT: shll %cl, %esi 1145; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1146; X86-SSE2-NEXT: orl %esi, %eax 1147; X86-SSE2-NEXT: popl %esi 1148; X86-SSE2-NEXT: retl 1149; 1150; X64-AVX-LABEL: or_shl_fshl: 1151; X64-AVX: # %bb.0: 1152; X64-AVX-NEXT: movl %edx, %ecx 1153; X64-AVX-NEXT: movl %esi, %eax 1154; X64-AVX-NEXT: shll %cl, %eax 1155; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1156; X64-AVX-NEXT: shldl %cl, %esi, %edi 1157; X64-AVX-NEXT: orl %edi, %eax 1158; X64-AVX-NEXT: retq 1159 %shy = shl i32 %y, %s 1160 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) 1161 %or = or i32 %fun, %shy 1162 ret i32 %or 1163} 1164 1165define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind { 1166; X86-SSE2-LABEL: or_shl_rotl: 1167; X86-SSE2: # %bb.0: 1168; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1169; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1170; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1171; X86-SSE2-NEXT: shll %cl, %edx 1172; X86-SSE2-NEXT: roll %cl, %eax 1173; X86-SSE2-NEXT: orl %edx, %eax 1174; X86-SSE2-NEXT: retl 1175; 1176; X64-AVX-LABEL: or_shl_rotl: 1177; X64-AVX: # %bb.0: 1178; X64-AVX-NEXT: movl %edx, %ecx 1179; X64-AVX-NEXT: movl %esi, %eax 1180; X64-AVX-NEXT: shll %cl, %edi 1181; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1182; X64-AVX-NEXT: roll %cl, %eax 1183; X64-AVX-NEXT: orl %edi, %eax 1184; X64-AVX-NEXT: retq 1185 %shx = shl i32 %x, %s 1186 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s) 1187 %or = or i32 %rot, %shx 1188 ret i32 %or 1189} 1190 1191define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind { 1192; X86-SSE2-LABEL: or_shl_fshl_commute: 1193; X86-SSE2: # %bb.0: 1194; X86-SSE2-NEXT: pushl %esi 1195; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1196; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1197; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1198; X86-SSE2-NEXT: movl %edx, %esi 1199; X86-SSE2-NEXT: shll %cl, %esi 1200; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1201; X86-SSE2-NEXT: orl %esi, %eax 1202; X86-SSE2-NEXT: popl %esi 1203; X86-SSE2-NEXT: retl 1204; 1205; X64-AVX-LABEL: or_shl_fshl_commute: 1206; X64-AVX: # %bb.0: 1207; X64-AVX-NEXT: movl %edx, %ecx 1208; X64-AVX-NEXT: movl %esi, %eax 1209; X64-AVX-NEXT: shll %cl, %eax 1210; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1211; X64-AVX-NEXT: shldl %cl, %esi, %edi 1212; X64-AVX-NEXT: orl %edi, %eax 1213; X64-AVX-NEXT: retq 1214 %shy = shl i32 %y, %s 1215 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) 1216 %or = or i32 %shy, %fun 1217 ret i32 %or 1218} 1219 1220define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind { 1221; X86-SSE2-LABEL: or_shl_rotl_commute: 1222; X86-SSE2: # %bb.0: 1223; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1224; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1225; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1226; X86-SSE2-NEXT: shll %cl, %edx 1227; X86-SSE2-NEXT: roll %cl, %eax 1228; X86-SSE2-NEXT: orl %edx, %eax 1229; X86-SSE2-NEXT: retl 1230; 1231; X64-AVX-LABEL: or_shl_rotl_commute: 1232; X64-AVX: # %bb.0: 1233; X64-AVX-NEXT: movl %edx, %ecx 1234; X64-AVX-NEXT: movl %esi, %eax 1235; X64-AVX-NEXT: shll %cl, %edi 1236; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1237; X64-AVX-NEXT: roll %cl, %eax 1238; X64-AVX-NEXT: orl %edi, %eax 1239; X64-AVX-NEXT: retq 1240 %shx = shl i32 %x, %s 1241 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s) 1242 %or = or i32 %shx, %rot 1243 ret i32 %or 1244} 1245 1246define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind { 1247; X86-SSE2-LABEL: or_lshr_fshr: 1248; X86-SSE2: # %bb.0: 1249; X86-SSE2-NEXT: pushl %esi 1250; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1251; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1252; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1253; X86-SSE2-NEXT: movl %edx, %esi 1254; X86-SSE2-NEXT: shrl %cl, %esi 1255; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1256; X86-SSE2-NEXT: orl %esi, %eax 1257; X86-SSE2-NEXT: popl %esi 1258; X86-SSE2-NEXT: retl 1259; 1260; X64-AVX-LABEL: or_lshr_fshr: 1261; X64-AVX: # %bb.0: 1262; X64-AVX-NEXT: movl %edx, %ecx 1263; X64-AVX-NEXT: movl %esi, %eax 1264; X64-AVX-NEXT: shrl %cl, %eax 1265; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1266; X64-AVX-NEXT: shrdl %cl, %esi, %edi 1267; X64-AVX-NEXT: orl %edi, %eax 1268; X64-AVX-NEXT: retq 1269 %shy = lshr i32 %y, %s 1270 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) 1271 %or = or i32 %fun, %shy 1272 ret i32 %or 1273} 1274 1275define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind { 1276; X86-SSE2-LABEL: or_lshr_rotr: 1277; X86-SSE2: # %bb.0: 1278; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1279; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1280; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1281; X86-SSE2-NEXT: shrl %cl, %edx 1282; X86-SSE2-NEXT: rorl %cl, %eax 1283; X86-SSE2-NEXT: orl %edx, %eax 1284; X86-SSE2-NEXT: retl 1285; 1286; X64-AVX-LABEL: or_lshr_rotr: 1287; X64-AVX: # %bb.0: 1288; X64-AVX-NEXT: movl %edx, %ecx 1289; X64-AVX-NEXT: movl %esi, %eax 1290; X64-AVX-NEXT: shrl %cl, %edi 1291; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1292; X64-AVX-NEXT: rorl %cl, %eax 1293; X64-AVX-NEXT: orl %edi, %eax 1294; X64-AVX-NEXT: retq 1295 %shx = lshr i32 %x, %s 1296 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) 1297 %or = or i32 %rot, %shx 1298 ret i32 %or 1299} 1300 1301define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind { 1302; X86-SSE2-LABEL: or_lshr_fshr_commute: 1303; X86-SSE2: # %bb.0: 1304; X86-SSE2-NEXT: pushl %esi 1305; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1306; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1307; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1308; X86-SSE2-NEXT: movl %edx, %esi 1309; X86-SSE2-NEXT: shrl %cl, %esi 1310; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1311; X86-SSE2-NEXT: orl %esi, %eax 1312; X86-SSE2-NEXT: popl %esi 1313; X86-SSE2-NEXT: retl 1314; 1315; X64-AVX-LABEL: or_lshr_fshr_commute: 1316; X64-AVX: # %bb.0: 1317; X64-AVX-NEXT: movl %edx, %ecx 1318; X64-AVX-NEXT: movl %esi, %eax 1319; X64-AVX-NEXT: shrl %cl, %eax 1320; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1321; X64-AVX-NEXT: shrdl %cl, %esi, %edi 1322; X64-AVX-NEXT: orl %edi, %eax 1323; X64-AVX-NEXT: retq 1324 %shy = lshr i32 %y, %s 1325 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) 1326 %or = or i32 %shy, %fun 1327 ret i32 %or 1328} 1329 1330define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind { 1331; X86-SSE2-LABEL: or_lshr_rotr_commute: 1332; X86-SSE2: # %bb.0: 1333; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1334; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1335; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1336; X86-SSE2-NEXT: shrl %cl, %edx 1337; X86-SSE2-NEXT: rorl %cl, %eax 1338; X86-SSE2-NEXT: orl %edx, %eax 1339; X86-SSE2-NEXT: retl 1340; 1341; X64-AVX-LABEL: or_lshr_rotr_commute: 1342; X64-AVX: # %bb.0: 1343; X64-AVX-NEXT: movl %edx, %ecx 1344; X64-AVX-NEXT: movl %esi, %eax 1345; X64-AVX-NEXT: shrl %cl, %edi 1346; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1347; X64-AVX-NEXT: rorl %cl, %eax 1348; X64-AVX-NEXT: orl %edi, %eax 1349; X64-AVX-NEXT: retq 1350 %shx = lshr i32 %x, %s 1351 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s) 1352 %or = or i32 %shx, %rot 1353 ret i32 %or 1354} 1355 1356define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind { 1357; X86-SSE2-LABEL: or_shl_fshl_simplify: 1358; X86-SSE2: # %bb.0: 1359; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1360; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1361; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1362; X86-SSE2-NEXT: shldl %cl, %edx, %eax 1363; X86-SSE2-NEXT: retl 1364; 1365; X64-AVX-LABEL: or_shl_fshl_simplify: 1366; X64-AVX: # %bb.0: 1367; X64-AVX-NEXT: movl %edx, %ecx 1368; X64-AVX-NEXT: movl %esi, %eax 1369; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1370; X64-AVX-NEXT: shldl %cl, %edi, %eax 1371; X64-AVX-NEXT: retq 1372 %shy = shl i32 %y, %s 1373 %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s) 1374 %or = or i32 %fun, %shy 1375 ret i32 %or 1376} 1377 1378define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind { 1379; X86-SSE2-LABEL: or_lshr_fshr_simplify: 1380; X86-SSE2: # %bb.0: 1381; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 1382; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 1383; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1384; X86-SSE2-NEXT: shrdl %cl, %edx, %eax 1385; X86-SSE2-NEXT: retl 1386; 1387; X64-AVX-LABEL: or_lshr_fshr_simplify: 1388; X64-AVX: # %bb.0: 1389; X64-AVX-NEXT: movl %edx, %ecx 1390; X64-AVX-NEXT: movl %esi, %eax 1391; X64-AVX-NEXT: # kill: def $cl killed $cl killed $ecx 1392; X64-AVX-NEXT: shrdl %cl, %edi, %eax 1393; X64-AVX-NEXT: retq 1394 %shy = lshr i32 %y, %s 1395 %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s) 1396 %or = or i32 %shy, %fun 1397 ret i32 %or 1398} 1399