1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK 3 4; fold (sra (load i32), 16)) -> (sextload i16) 5define i32 @sra_half(ptr %p) { 6; CHECK-LABEL: sra_half: 7; CHECK: # %bb.0: 8; CHECK-NEXT: movswl 2(%rdi), %eax 9; CHECK-NEXT: retq 10 %load = load i32, ptr %p 11 %shift = ashr i32 %load, 16 12 ret i32 %shift 13} 14 15; Vector version not folded. 16define <4 x i32> @sra_half_vec(ptr %p) { 17; CHECK-LABEL: sra_half_vec: 18; CHECK: # %bb.0: 19; CHECK-NEXT: movdqa (%rdi), %xmm0 20; CHECK-NEXT: psrad $16, %xmm0 21; CHECK-NEXT: retq 22 %load = load <4 x i32>, ptr %p 23 %shift = ashr <4 x i32> %load, <i32 16, i32 16, i32 16, i32 16> 24 ret <4 x i32> %shift 25} 26 27; fold (sra (load i64), 48)) -> (sextload i16) 28define i64 @sra_large_shift(ptr %r) { 29; CHECK-LABEL: sra_large_shift: 30; CHECK: # %bb.0: 31; CHECK-NEXT: movswq 6(%rdi), %rax 32; CHECK-NEXT: retq 33 %t0 = load i64, ptr %r 34 %conv = ashr i64 %t0, 48 35 ret i64 %conv 36} 37 38; Negative test, no fold expected. 39define i32 @sra_small_shift(ptr %p) { 40; CHECK-LABEL: sra_small_shift: 41; CHECK: # %bb.0: 42; CHECK-NEXT: movl (%rdi), %eax 43; CHECK-NEXT: sarl $8, %eax 44; CHECK-NEXT: retq 45 %load = load i32, ptr %p 46 %shift = ashr i32 %load, 8 47 ret i32 %shift 48} 49 50; This should be folded to a zextload. 51define i32 @sra_of_zextload(ptr %p) { 52; CHECK-LABEL: sra_of_zextload: 53; CHECK: # %bb.0: 54; CHECK-NEXT: movzbl 1(%rdi), %eax 55; CHECK-NEXT: retq 56 %load = load i16, ptr %p 57 %zext = zext i16 %load to i32 58 %shift = ashr i32 %zext, 8 59 ret i32 %shift 60} 61 62; fold (sra (sextload i16 to i32), 8) -> (sextload i8) 63define i32 @sra_of_sextload(ptr %p) { 64; CHECK-LABEL: sra_of_sextload: 65; CHECK: # %bb.0: 66; CHECK-NEXT: movsbl 1(%rdi), %eax 67; CHECK-NEXT: retq 68 %load = load i16, ptr %p 69 %sext = sext i16 %load to i32 70 %shift = ashr i32 %sext, 8 71 ret i32 %shift 72} 73 74; Negative test. If the shift amount is larger than the memory type then 75; we're not accessing any of the loaded bytes (only the extended bits). So the 76; shift is expected to remain. 77define i32 @sra_of_sextload_no_fold(ptr %p) { 78; CHECK-LABEL: sra_of_sextload_no_fold: 79; CHECK: # %bb.0: 80; CHECK-NEXT: movswl (%rdi), %eax 81; CHECK-NEXT: sarl $16, %eax 82; CHECK-NEXT: retq 83 %load = load i16, ptr %p 84 %sext = sext i16 %load to i32 85 %shift = ashr i32 %sext, 16 86 ret i32 %shift 87} 88 89; Fold even if SRA has multiple uses. 90define i32 @sra_to_sextload_multiple_sra_uses(ptr %p) { 91; CHECK-LABEL: sra_to_sextload_multiple_sra_uses: 92; CHECK: # %bb.0: 93; CHECK-NEXT: movswl 2(%rdi), %ecx 94; CHECK-NEXT: movl %ecx, %eax 95; CHECK-NEXT: xorl $6, %eax 96; CHECK-NEXT: imull %ecx, %eax 97; CHECK-NEXT: retq 98 %load = load i32, ptr %p 99 %shift = ashr i32 %load, 16 100 %use1 = xor i32 %shift, 6 101 %use2 = mul i32 %shift, %use1 102 ret i32 %use2 103} 104