1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64 4 5; Check that we recognize this idiom for rotation too: 6; a << (b & (OpSize-1)) | a >> ((0 - b) & (OpSize-1)) 7 8define i32 @rotate_left_32(i32 %a, i32 %b) { 9; X86-LABEL: rotate_left_32: 10; X86: # %bb.0: 11; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 12; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 13; X86-NEXT: roll %cl, %eax 14; X86-NEXT: retl 15; 16; X64-LABEL: rotate_left_32: 17; X64: # %bb.0: 18; X64-NEXT: movl %esi, %ecx 19; X64-NEXT: movl %edi, %eax 20; X64-NEXT: # kill: def $cl killed $cl killed $ecx 21; X64-NEXT: roll %cl, %eax 22; X64-NEXT: retq 23 %and = and i32 %b, 31 24 %shl = shl i32 %a, %and 25 %t0 = sub i32 0, %b 26 %and3 = and i32 %t0, 31 27 %shr = lshr i32 %a, %and3 28 %or = or i32 %shl, %shr 29 ret i32 %or 30} 31 32define i32 @rotate_right_32(i32 %a, i32 %b) { 33; X86-LABEL: rotate_right_32: 34; X86: # %bb.0: 35; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 36; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 37; X86-NEXT: rorl %cl, %eax 38; X86-NEXT: retl 39; 40; X64-LABEL: rotate_right_32: 41; X64: # %bb.0: 42; X64-NEXT: movl %esi, %ecx 43; X64-NEXT: movl %edi, %eax 44; X64-NEXT: # kill: def $cl killed $cl killed $ecx 45; X64-NEXT: rorl %cl, %eax 46; X64-NEXT: retq 47 %and = and i32 %b, 31 48 %shl = lshr i32 %a, %and 49 %t0 = sub i32 0, %b 50 %and3 = and i32 %t0, 31 51 %shr = shl i32 %a, %and3 52 %or = or i32 %shl, %shr 53 ret i32 %or 54} 55 56define i64 @rotate_left_64(i64 %a, i64 %b) { 57; X86-LABEL: rotate_left_64: 58; X86: # %bb.0: 59; X86-NEXT: pushl %ebx 60; X86-NEXT: .cfi_def_cfa_offset 8 61; X86-NEXT: pushl %edi 62; X86-NEXT: .cfi_def_cfa_offset 12 63; X86-NEXT: pushl %esi 64; X86-NEXT: .cfi_def_cfa_offset 16 65; X86-NEXT: .cfi_offset %esi, -16 66; X86-NEXT: .cfi_offset %edi, -12 67; X86-NEXT: .cfi_offset %ebx, -8 68; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 69; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 70; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 71; X86-NEXT: movl %esi, %eax 72; X86-NEXT: shll %cl, %eax 73; X86-NEXT: movl %edi, %edx 74; X86-NEXT: shldl %cl, %esi, %edx 75; X86-NEXT: testb $32, %cl 76; X86-NEXT: je .LBB2_2 77; X86-NEXT: # %bb.1: 78; X86-NEXT: movl %eax, %edx 79; X86-NEXT: xorl %eax, %eax 80; X86-NEXT: .LBB2_2: 81; X86-NEXT: negb %cl 82; X86-NEXT: movl %edi, %ebx 83; X86-NEXT: shrl %cl, %ebx 84; X86-NEXT: shrdl %cl, %edi, %esi 85; X86-NEXT: testb $32, %cl 86; X86-NEXT: je .LBB2_4 87; X86-NEXT: # %bb.3: 88; X86-NEXT: movl %ebx, %esi 89; X86-NEXT: xorl %ebx, %ebx 90; X86-NEXT: .LBB2_4: 91; X86-NEXT: orl %ebx, %edx 92; X86-NEXT: orl %esi, %eax 93; X86-NEXT: popl %esi 94; X86-NEXT: .cfi_def_cfa_offset 12 95; X86-NEXT: popl %edi 96; X86-NEXT: .cfi_def_cfa_offset 8 97; X86-NEXT: popl %ebx 98; X86-NEXT: .cfi_def_cfa_offset 4 99; X86-NEXT: retl 100; 101; X64-LABEL: rotate_left_64: 102; X64: # %bb.0: 103; X64-NEXT: movq %rsi, %rcx 104; X64-NEXT: movq %rdi, %rax 105; X64-NEXT: # kill: def $cl killed $cl killed $rcx 106; X64-NEXT: rolq %cl, %rax 107; X64-NEXT: retq 108 %and = and i64 %b, 63 109 %shl = shl i64 %a, %and 110 %t0 = sub i64 0, %b 111 %and3 = and i64 %t0, 63 112 %shr = lshr i64 %a, %and3 113 %or = or i64 %shl, %shr 114 ret i64 %or 115} 116 117define i64 @rotate_right_64(i64 %a, i64 %b) { 118; X86-LABEL: rotate_right_64: 119; X86: # %bb.0: 120; X86-NEXT: pushl %ebx 121; X86-NEXT: .cfi_def_cfa_offset 8 122; X86-NEXT: pushl %edi 123; X86-NEXT: .cfi_def_cfa_offset 12 124; X86-NEXT: pushl %esi 125; X86-NEXT: .cfi_def_cfa_offset 16 126; X86-NEXT: .cfi_offset %esi, -16 127; X86-NEXT: .cfi_offset %edi, -12 128; X86-NEXT: .cfi_offset %ebx, -8 129; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 130; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 131; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 132; X86-NEXT: movl %esi, %edx 133; X86-NEXT: shrl %cl, %edx 134; X86-NEXT: movl %edi, %eax 135; X86-NEXT: shrdl %cl, %esi, %eax 136; X86-NEXT: testb $32, %cl 137; X86-NEXT: je .LBB3_2 138; X86-NEXT: # %bb.1: 139; X86-NEXT: movl %edx, %eax 140; X86-NEXT: xorl %edx, %edx 141; X86-NEXT: .LBB3_2: 142; X86-NEXT: negb %cl 143; X86-NEXT: movl %edi, %ebx 144; X86-NEXT: shll %cl, %ebx 145; X86-NEXT: shldl %cl, %edi, %esi 146; X86-NEXT: testb $32, %cl 147; X86-NEXT: je .LBB3_4 148; X86-NEXT: # %bb.3: 149; X86-NEXT: movl %ebx, %esi 150; X86-NEXT: xorl %ebx, %ebx 151; X86-NEXT: .LBB3_4: 152; X86-NEXT: orl %esi, %edx 153; X86-NEXT: orl %ebx, %eax 154; X86-NEXT: popl %esi 155; X86-NEXT: .cfi_def_cfa_offset 12 156; X86-NEXT: popl %edi 157; X86-NEXT: .cfi_def_cfa_offset 8 158; X86-NEXT: popl %ebx 159; X86-NEXT: .cfi_def_cfa_offset 4 160; X86-NEXT: retl 161; 162; X64-LABEL: rotate_right_64: 163; X64: # %bb.0: 164; X64-NEXT: movq %rsi, %rcx 165; X64-NEXT: movq %rdi, %rax 166; X64-NEXT: # kill: def $cl killed $cl killed $rcx 167; X64-NEXT: rorq %cl, %rax 168; X64-NEXT: retq 169 %and = and i64 %b, 63 170 %shl = lshr i64 %a, %and 171 %t0 = sub i64 0, %b 172 %and3 = and i64 %t0, 63 173 %shr = shl i64 %a, %and3 174 %or = or i64 %shl, %shr 175 ret i64 %or 176} 177 178; Also check mem operand. 179 180define void @rotate_left_m32(ptr%pa, i32 %b) { 181; X86-LABEL: rotate_left_m32: 182; X86: # %bb.0: 183; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 184; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 185; X86-NEXT: roll %cl, (%eax) 186; X86-NEXT: retl 187; 188; X64-LABEL: rotate_left_m32: 189; X64: # %bb.0: 190; X64-NEXT: movl %esi, %ecx 191; X64-NEXT: # kill: def $cl killed $cl killed $ecx 192; X64-NEXT: roll %cl, (%rdi) 193; X64-NEXT: retq 194 %a = load i32, ptr %pa, align 16 195 %and = and i32 %b, 31 196 %shl = shl i32 %a, %and 197 %t0 = sub i32 0, %b 198 %and3 = and i32 %t0, 31 199 %shr = lshr i32 %a, %and3 200 %or = or i32 %shl, %shr 201 store i32 %or, ptr %pa, align 32 202 ret void 203} 204 205define void @rotate_right_m32(ptr%pa, i32 %b) { 206; X86-LABEL: rotate_right_m32: 207; X86: # %bb.0: 208; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 209; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 210; X86-NEXT: rorl %cl, (%eax) 211; X86-NEXT: retl 212; 213; X64-LABEL: rotate_right_m32: 214; X64: # %bb.0: 215; X64-NEXT: movl %esi, %ecx 216; X64-NEXT: # kill: def $cl killed $cl killed $ecx 217; X64-NEXT: rorl %cl, (%rdi) 218; X64-NEXT: retq 219 %a = load i32, ptr %pa, align 16 220 %and = and i32 %b, 31 221 %shl = lshr i32 %a, %and 222 %t0 = sub i32 0, %b 223 %and3 = and i32 %t0, 31 224 %shr = shl i32 %a, %and3 225 %or = or i32 %shl, %shr 226 store i32 %or, ptr %pa, align 32 227 ret void 228} 229 230define void @rotate_left_m64(ptr%pa, i64 %b) { 231; X86-LABEL: rotate_left_m64: 232; X86: # %bb.0: 233; X86-NEXT: pushl %ebp 234; X86-NEXT: .cfi_def_cfa_offset 8 235; X86-NEXT: pushl %ebx 236; X86-NEXT: .cfi_def_cfa_offset 12 237; X86-NEXT: pushl %edi 238; X86-NEXT: .cfi_def_cfa_offset 16 239; X86-NEXT: pushl %esi 240; X86-NEXT: .cfi_def_cfa_offset 20 241; X86-NEXT: .cfi_offset %esi, -20 242; X86-NEXT: .cfi_offset %edi, -16 243; X86-NEXT: .cfi_offset %ebx, -12 244; X86-NEXT: .cfi_offset %ebp, -8 245; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 246; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 247; X86-NEXT: movl (%eax), %esi 248; X86-NEXT: movl 4(%eax), %ebx 249; X86-NEXT: movl %esi, %edx 250; X86-NEXT: shll %cl, %edx 251; X86-NEXT: movl %ebx, %edi 252; X86-NEXT: shldl %cl, %esi, %edi 253; X86-NEXT: testb $32, %cl 254; X86-NEXT: je .LBB6_2 255; X86-NEXT: # %bb.1: 256; X86-NEXT: movl %edx, %edi 257; X86-NEXT: xorl %edx, %edx 258; X86-NEXT: .LBB6_2: 259; X86-NEXT: negb %cl 260; X86-NEXT: movl %ebx, %ebp 261; X86-NEXT: shrl %cl, %ebp 262; X86-NEXT: shrdl %cl, %ebx, %esi 263; X86-NEXT: testb $32, %cl 264; X86-NEXT: je .LBB6_4 265; X86-NEXT: # %bb.3: 266; X86-NEXT: movl %ebp, %esi 267; X86-NEXT: xorl %ebp, %ebp 268; X86-NEXT: .LBB6_4: 269; X86-NEXT: orl %esi, %edx 270; X86-NEXT: orl %ebp, %edi 271; X86-NEXT: movl %edx, (%eax) 272; X86-NEXT: movl %edi, 4(%eax) 273; X86-NEXT: popl %esi 274; X86-NEXT: .cfi_def_cfa_offset 16 275; X86-NEXT: popl %edi 276; X86-NEXT: .cfi_def_cfa_offset 12 277; X86-NEXT: popl %ebx 278; X86-NEXT: .cfi_def_cfa_offset 8 279; X86-NEXT: popl %ebp 280; X86-NEXT: .cfi_def_cfa_offset 4 281; X86-NEXT: retl 282; 283; X64-LABEL: rotate_left_m64: 284; X64: # %bb.0: 285; X64-NEXT: movq %rsi, %rcx 286; X64-NEXT: # kill: def $cl killed $cl killed $rcx 287; X64-NEXT: rolq %cl, (%rdi) 288; X64-NEXT: retq 289 %a = load i64, ptr %pa, align 16 290 %and = and i64 %b, 63 291 %shl = shl i64 %a, %and 292 %t0 = sub i64 0, %b 293 %and3 = and i64 %t0, 63 294 %shr = lshr i64 %a, %and3 295 %or = or i64 %shl, %shr 296 store i64 %or, ptr %pa, align 64 297 ret void 298} 299 300define void @rotate_right_m64(ptr%pa, i64 %b) { 301; X86-LABEL: rotate_right_m64: 302; X86: # %bb.0: 303; X86-NEXT: pushl %ebp 304; X86-NEXT: .cfi_def_cfa_offset 8 305; X86-NEXT: pushl %ebx 306; X86-NEXT: .cfi_def_cfa_offset 12 307; X86-NEXT: pushl %edi 308; X86-NEXT: .cfi_def_cfa_offset 16 309; X86-NEXT: pushl %esi 310; X86-NEXT: .cfi_def_cfa_offset 20 311; X86-NEXT: .cfi_offset %esi, -20 312; X86-NEXT: .cfi_offset %edi, -16 313; X86-NEXT: .cfi_offset %ebx, -12 314; X86-NEXT: .cfi_offset %ebp, -8 315; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 316; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 317; X86-NEXT: movl (%eax), %ebx 318; X86-NEXT: movl 4(%eax), %esi 319; X86-NEXT: movl %esi, %edx 320; X86-NEXT: shrl %cl, %edx 321; X86-NEXT: movl %ebx, %edi 322; X86-NEXT: shrdl %cl, %esi, %edi 323; X86-NEXT: testb $32, %cl 324; X86-NEXT: je .LBB7_2 325; X86-NEXT: # %bb.1: 326; X86-NEXT: movl %edx, %edi 327; X86-NEXT: xorl %edx, %edx 328; X86-NEXT: .LBB7_2: 329; X86-NEXT: negb %cl 330; X86-NEXT: movl %ebx, %ebp 331; X86-NEXT: shll %cl, %ebp 332; X86-NEXT: shldl %cl, %ebx, %esi 333; X86-NEXT: testb $32, %cl 334; X86-NEXT: je .LBB7_4 335; X86-NEXT: # %bb.3: 336; X86-NEXT: movl %ebp, %esi 337; X86-NEXT: xorl %ebp, %ebp 338; X86-NEXT: .LBB7_4: 339; X86-NEXT: orl %ebp, %edi 340; X86-NEXT: orl %esi, %edx 341; X86-NEXT: movl %edi, (%eax) 342; X86-NEXT: movl %edx, 4(%eax) 343; X86-NEXT: popl %esi 344; X86-NEXT: .cfi_def_cfa_offset 16 345; X86-NEXT: popl %edi 346; X86-NEXT: .cfi_def_cfa_offset 12 347; X86-NEXT: popl %ebx 348; X86-NEXT: .cfi_def_cfa_offset 8 349; X86-NEXT: popl %ebp 350; X86-NEXT: .cfi_def_cfa_offset 4 351; X86-NEXT: retl 352; 353; X64-LABEL: rotate_right_m64: 354; X64: # %bb.0: 355; X64-NEXT: movq %rsi, %rcx 356; X64-NEXT: # kill: def $cl killed $cl killed $rcx 357; X64-NEXT: rorq %cl, (%rdi) 358; X64-NEXT: retq 359 %a = load i64, ptr %pa, align 16 360 %and = and i64 %b, 63 361 %shl = lshr i64 %a, %and 362 %t0 = sub i64 0, %b 363 %and3 = and i64 %t0, 63 364 %shr = shl i64 %a, %and3 365 %or = or i64 %shl, %shr 366 store i64 %or, ptr %pa, align 64 367 ret void 368} 369 370; The next 8 tests include masks of the narrow width shift amounts that should be eliminated. 371; These patterns are produced by instcombine after r310509. 372 373define i8 @rotate_left_8(i8 %x, i32 %amount) { 374; X86-LABEL: rotate_left_8: 375; X86: # %bb.0: 376; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 377; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 378; X86-NEXT: rolb %cl, %al 379; X86-NEXT: retl 380; 381; X64-LABEL: rotate_left_8: 382; X64: # %bb.0: 383; X64-NEXT: movl %esi, %ecx 384; X64-NEXT: movl %edi, %eax 385; X64-NEXT: # kill: def $cl killed $cl killed $ecx 386; X64-NEXT: rolb %cl, %al 387; X64-NEXT: # kill: def $al killed $al killed $eax 388; X64-NEXT: retq 389 %amt = trunc i32 %amount to i8 390 %sub = sub i8 0, %amt 391 %maskamt = and i8 %amt, 7 392 %masksub = and i8 %sub, 7 393 %shl = shl i8 %x, %maskamt 394 %shr = lshr i8 %x, %masksub 395 %or = or i8 %shl, %shr 396 ret i8 %or 397} 398 399define i8 @rotate_right_8(i8 %x, i32 %amount) { 400; X86-LABEL: rotate_right_8: 401; X86: # %bb.0: 402; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 403; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 404; X86-NEXT: rorb %cl, %al 405; X86-NEXT: retl 406; 407; X64-LABEL: rotate_right_8: 408; X64: # %bb.0: 409; X64-NEXT: movl %esi, %ecx 410; X64-NEXT: movl %edi, %eax 411; X64-NEXT: # kill: def $cl killed $cl killed $ecx 412; X64-NEXT: rorb %cl, %al 413; X64-NEXT: # kill: def $al killed $al killed $eax 414; X64-NEXT: retq 415 %amt = trunc i32 %amount to i8 416 %sub = sub i8 0, %amt 417 %maskamt = and i8 %amt, 7 418 %masksub = and i8 %sub, 7 419 %shr = lshr i8 %x, %maskamt 420 %shl = shl i8 %x, %masksub 421 %or = or i8 %shr, %shl 422 ret i8 %or 423} 424 425define i16 @rotate_left_16(i16 %x, i32 %amount) { 426; X86-LABEL: rotate_left_16: 427; X86: # %bb.0: 428; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 429; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 430; X86-NEXT: rolw %cl, %ax 431; X86-NEXT: retl 432; 433; X64-LABEL: rotate_left_16: 434; X64: # %bb.0: 435; X64-NEXT: movl %esi, %ecx 436; X64-NEXT: movl %edi, %eax 437; X64-NEXT: # kill: def $cl killed $cl killed $ecx 438; X64-NEXT: rolw %cl, %ax 439; X64-NEXT: # kill: def $ax killed $ax killed $eax 440; X64-NEXT: retq 441 %amt = trunc i32 %amount to i16 442 %sub = sub i16 0, %amt 443 %maskamt = and i16 %amt, 15 444 %masksub = and i16 %sub, 15 445 %shl = shl i16 %x, %maskamt 446 %shr = lshr i16 %x, %masksub 447 %or = or i16 %shl, %shr 448 ret i16 %or 449} 450 451define i16 @rotate_right_16(i16 %x, i32 %amount) { 452; X86-LABEL: rotate_right_16: 453; X86: # %bb.0: 454; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 455; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 456; X86-NEXT: rorw %cl, %ax 457; X86-NEXT: retl 458; 459; X64-LABEL: rotate_right_16: 460; X64: # %bb.0: 461; X64-NEXT: movl %esi, %ecx 462; X64-NEXT: movl %edi, %eax 463; X64-NEXT: # kill: def $cl killed $cl killed $ecx 464; X64-NEXT: rorw %cl, %ax 465; X64-NEXT: # kill: def $ax killed $ax killed $eax 466; X64-NEXT: retq 467 %amt = trunc i32 %amount to i16 468 %sub = sub i16 0, %amt 469 %maskamt = and i16 %amt, 15 470 %masksub = and i16 %sub, 15 471 %shr = lshr i16 %x, %maskamt 472 %shl = shl i16 %x, %masksub 473 %or = or i16 %shr, %shl 474 ret i16 %or 475} 476 477define void @rotate_left_m8(ptr %p, i32 %amount) { 478; X86-LABEL: rotate_left_m8: 479; X86: # %bb.0: 480; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 481; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 482; X86-NEXT: rolb %cl, (%eax) 483; X86-NEXT: retl 484; 485; X64-LABEL: rotate_left_m8: 486; X64: # %bb.0: 487; X64-NEXT: movl %esi, %ecx 488; X64-NEXT: # kill: def $cl killed $cl killed $ecx 489; X64-NEXT: rolb %cl, (%rdi) 490; X64-NEXT: retq 491 %x = load i8, ptr %p, align 1 492 %amt = trunc i32 %amount to i8 493 %sub = sub i8 0, %amt 494 %maskamt = and i8 %amt, 7 495 %masksub = and i8 %sub, 7 496 %shl = shl i8 %x, %maskamt 497 %shr = lshr i8 %x, %masksub 498 %or = or i8 %shl, %shr 499 store i8 %or, ptr %p, align 1 500 ret void 501} 502 503define void @rotate_right_m8(ptr %p, i32 %amount) { 504; X86-LABEL: rotate_right_m8: 505; X86: # %bb.0: 506; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 507; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 508; X86-NEXT: rorb %cl, (%eax) 509; X86-NEXT: retl 510; 511; X64-LABEL: rotate_right_m8: 512; X64: # %bb.0: 513; X64-NEXT: movl %esi, %ecx 514; X64-NEXT: # kill: def $cl killed $cl killed $ecx 515; X64-NEXT: rorb %cl, (%rdi) 516; X64-NEXT: retq 517 %x = load i8, ptr %p, align 1 518 %amt = trunc i32 %amount to i8 519 %sub = sub i8 0, %amt 520 %maskamt = and i8 %amt, 7 521 %masksub = and i8 %sub, 7 522 %shl = shl i8 %x, %masksub 523 %shr = lshr i8 %x, %maskamt 524 %or = or i8 %shl, %shr 525 store i8 %or, ptr %p, align 1 526 ret void 527} 528 529define void @rotate_left_m16(ptr %p, i32 %amount) { 530; X86-LABEL: rotate_left_m16: 531; X86: # %bb.0: 532; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 533; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 534; X86-NEXT: rolw %cl, (%eax) 535; X86-NEXT: retl 536; 537; X64-LABEL: rotate_left_m16: 538; X64: # %bb.0: 539; X64-NEXT: movl %esi, %ecx 540; X64-NEXT: # kill: def $cl killed $cl killed $ecx 541; X64-NEXT: rolw %cl, (%rdi) 542; X64-NEXT: retq 543 %x = load i16, ptr %p, align 1 544 %amt = trunc i32 %amount to i16 545 %sub = sub i16 0, %amt 546 %maskamt = and i16 %amt, 15 547 %masksub = and i16 %sub, 15 548 %shl = shl i16 %x, %maskamt 549 %shr = lshr i16 %x, %masksub 550 %or = or i16 %shl, %shr 551 store i16 %or, ptr %p, align 1 552 ret void 553} 554 555define void @rotate_right_m16(ptr %p, i32 %amount) { 556; X86-LABEL: rotate_right_m16: 557; X86: # %bb.0: 558; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 559; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 560; X86-NEXT: rorw %cl, (%eax) 561; X86-NEXT: retl 562; 563; X64-LABEL: rotate_right_m16: 564; X64: # %bb.0: 565; X64-NEXT: movl %esi, %ecx 566; X64-NEXT: # kill: def $cl killed $cl killed $ecx 567; X64-NEXT: rorw %cl, (%rdi) 568; X64-NEXT: retq 569 %x = load i16, ptr %p, align 1 570 %amt = trunc i32 %amount to i16 571 %sub = sub i16 0, %amt 572 %maskamt = and i16 %amt, 15 573 %masksub = and i16 %sub, 15 574 %shl = shl i16 %x, %masksub 575 %shr = lshr i16 %x, %maskamt 576 %or = or i16 %shl, %shr 577 store i16 %or, ptr %p, align 1 578 ret void 579} 580 581define i32 @rotate_demanded_bits(i32, i32) { 582; X86-LABEL: rotate_demanded_bits: 583; X86: # %bb.0: 584; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 585; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 586; X86-NEXT: andb $30, %cl 587; X86-NEXT: roll %cl, %eax 588; X86-NEXT: retl 589; 590; X64-LABEL: rotate_demanded_bits: 591; X64: # %bb.0: 592; X64-NEXT: movl %esi, %ecx 593; X64-NEXT: movl %edi, %eax 594; X64-NEXT: andb $30, %cl 595; X64-NEXT: # kill: def $cl killed $cl killed $ecx 596; X64-NEXT: roll %cl, %eax 597; X64-NEXT: retq 598 %3 = and i32 %1, 30 599 %4 = shl i32 %0, %3 600 %5 = sub nsw i32 0, %3 601 %6 = and i32 %5, 30 602 %7 = lshr i32 %0, %6 603 %8 = or i32 %7, %4 604 ret i32 %8 605} 606 607define i32 @rotate_demanded_bits_2(i32, i32) { 608; X86-LABEL: rotate_demanded_bits_2: 609; X86: # %bb.0: 610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 611; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 612; X86-NEXT: andb $23, %cl 613; X86-NEXT: roll %cl, %eax 614; X86-NEXT: retl 615; 616; X64-LABEL: rotate_demanded_bits_2: 617; X64: # %bb.0: 618; X64-NEXT: movl %esi, %ecx 619; X64-NEXT: movl %edi, %eax 620; X64-NEXT: andb $23, %cl 621; X64-NEXT: # kill: def $cl killed $cl killed $ecx 622; X64-NEXT: roll %cl, %eax 623; X64-NEXT: retq 624 %3 = and i32 %1, 23 625 %4 = shl i32 %0, %3 626 %5 = sub nsw i32 0, %3 627 %6 = and i32 %5, 31 628 %7 = lshr i32 %0, %6 629 %8 = or i32 %7, %4 630 ret i32 %8 631} 632 633define i32 @rotate_demanded_bits_3(i32, i32) { 634; X86-LABEL: rotate_demanded_bits_3: 635; X86: # %bb.0: 636; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 637; X86-NEXT: addb %cl, %cl 638; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 639; X86-NEXT: roll %cl, %eax 640; X86-NEXT: retl 641; 642; X64-LABEL: rotate_demanded_bits_3: 643; X64: # %bb.0: 644; X64-NEXT: # kill: def $esi killed $esi def $rsi 645; X64-NEXT: movl %edi, %eax 646; X64-NEXT: leal (%rsi,%rsi), %ecx 647; X64-NEXT: # kill: def $cl killed $cl killed $ecx 648; X64-NEXT: roll %cl, %eax 649; X64-NEXT: retq 650 %3 = shl i32 %1, 1 651 %4 = and i32 %3, 30 652 %5 = shl i32 %0, %4 653 %6 = sub i32 0, %3 654 %7 = and i32 %6, 30 655 %8 = lshr i32 %0, %7 656 %9 = or i32 %5, %8 657 ret i32 %9 658} 659