1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 3; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 4 5define i32 @t1(ptr %X, i32 %i) { 6; X86-LABEL: t1: 7; X86: # %bb.0: # %entry 8; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 10; X86-NEXT: movzbl %cl, %ecx 11; X86-NEXT: movl (%eax,%ecx,4), %eax 12; X86-NEXT: retl 13; 14; X64-LABEL: t1: 15; X64: # %bb.0: # %entry 16; X64-NEXT: movzbl %sil, %eax 17; X64-NEXT: movl (%rdi,%rax,4), %eax 18; X64-NEXT: retq 19entry: 20 %tmp2 = shl i32 %i, 2 21 %tmp4 = and i32 %tmp2, 1020 22 %tmp7 = getelementptr i8, ptr %X, i32 %tmp4 23 %tmp9 = load i32, ptr %tmp7 24 ret i32 %tmp9 25} 26 27define i32 @t2(ptr %X, i32 %i) { 28; X86-LABEL: t2: 29; X86: # %bb.0: # %entry 30; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 31; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 32; X86-NEXT: movzwl %cx, %ecx 33; X86-NEXT: movl (%eax,%ecx,4), %eax 34; X86-NEXT: retl 35; 36; X64-LABEL: t2: 37; X64: # %bb.0: # %entry 38; X64-NEXT: movzwl %si, %eax 39; X64-NEXT: movl (%rdi,%rax,4), %eax 40; X64-NEXT: retq 41entry: 42 %tmp2 = shl i32 %i, 1 43 %tmp4 = and i32 %tmp2, 131070 44 %tmp7 = getelementptr i16, ptr %X, i32 %tmp4 45 %tmp9 = load i32, ptr %tmp7 46 ret i32 %tmp9 47} 48 49; This case is tricky. The lshr followed by a gep will produce a lshr followed 50; by an and to remove the low bits. This can be simplified by doing the lshr by 51; a greater constant and using the addressing mode to scale the result back up. 52; To make matters worse, because of the two-phase zext of %i and their reuse in 53; the function, the DAG can get confusing trying to re-use both of them and 54; prevent easy analysis of the mask in order to match this. 55define i32 @t3(ptr %i.ptr, ptr %arr) { 56; X86-LABEL: t3: 57; X86: # %bb.0: # %entry 58; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 59; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 60; X86-NEXT: movzwl (%eax), %eax 61; X86-NEXT: movl %eax, %edx 62; X86-NEXT: shrl $11, %edx 63; X86-NEXT: addl (%ecx,%edx,4), %eax 64; X86-NEXT: retl 65; 66; X64-LABEL: t3: 67; X64: # %bb.0: # %entry 68; X64-NEXT: movzwl (%rdi), %eax 69; X64-NEXT: movl %eax, %ecx 70; X64-NEXT: shrl $11, %ecx 71; X64-NEXT: addl (%rsi,%rcx,4), %eax 72; X64-NEXT: retq 73entry: 74 %i = load i16, ptr %i.ptr 75 %i.zext = zext i16 %i to i32 76 %index = lshr i32 %i.zext, 11 77 %val.ptr = getelementptr inbounds i32, ptr %arr, i32 %index 78 %val = load i32, ptr %val.ptr 79 %sum = add i32 %val, %i.zext 80 ret i32 %sum 81} 82 83; A version of @t3 that has more zero extends and more re-use of intermediate 84; values. This exercise slightly different bits of canonicalization. 85define i32 @t4(ptr %i.ptr, ptr %arr) { 86; X86-LABEL: t4: 87; X86: # %bb.0: # %entry 88; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 89; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 90; X86-NEXT: movzwl (%eax), %eax 91; X86-NEXT: movl %eax, %edx 92; X86-NEXT: shrl $11, %edx 93; X86-NEXT: addl (%ecx,%edx,4), %eax 94; X86-NEXT: addl %edx, %eax 95; X86-NEXT: retl 96; 97; X64-LABEL: t4: 98; X64: # %bb.0: # %entry 99; X64-NEXT: movzwl (%rdi), %eax 100; X64-NEXT: movl %eax, %ecx 101; X64-NEXT: shrl $11, %ecx 102; X64-NEXT: addl (%rsi,%rcx,4), %eax 103; X64-NEXT: addl %ecx, %eax 104; X64-NEXT: retq 105entry: 106 %i = load i16, ptr %i.ptr 107 %i.zext = zext i16 %i to i32 108 %index = lshr i32 %i.zext, 11 109 %index.zext = zext i32 %index to i64 110 %val.ptr = getelementptr inbounds i32, ptr %arr, i64 %index.zext 111 %val = load i32, ptr %val.ptr 112 %sum.1 = add i32 %val, %i.zext 113 %sum.2 = add i32 %sum.1, %index 114 ret i32 %sum.2 115} 116 117define i8 @t5(ptr %X, i32 %i) { 118; X86-LABEL: t5: 119; X86: # %bb.0: # %entry 120; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 121; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 122; X86-NEXT: andl $-14, %ecx 123; X86-NEXT: movzbl (%eax,%ecx,4), %eax 124; X86-NEXT: retl 125; 126; X64-LABEL: t5: 127; X64: # %bb.0: # %entry 128; X64-NEXT: shll $2, %esi 129; X64-NEXT: andl $-56, %esi 130; X64-NEXT: movslq %esi, %rax 131; X64-NEXT: movzbl (%rdi,%rax), %eax 132; X64-NEXT: retq 133entry: 134 %tmp2 = shl i32 %i, 2 135 %tmp4 = and i32 %tmp2, -56 136 %tmp7 = getelementptr i8, ptr %X, i32 %tmp4 137 %tmp9 = load i8, ptr %tmp7 138 ret i8 %tmp9 139} 140 141define i8 @t6(ptr %X, i32 %i) { 142; X86-LABEL: t6: 143; X86: # %bb.0: # %entry 144; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 145; X86-NEXT: movl $-255, %ecx 146; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 147; X86-NEXT: movzbl (%eax,%ecx,4), %eax 148; X86-NEXT: retl 149; 150; X64-LABEL: t6: 151; X64: # %bb.0: # %entry 152; X64-NEXT: shll $2, %esi 153; X64-NEXT: andl $-1020, %esi # imm = 0xFC04 154; X64-NEXT: movslq %esi, %rax 155; X64-NEXT: movzbl (%rdi,%rax), %eax 156; X64-NEXT: retq 157entry: 158 %tmp2 = shl i32 %i, 2 159 %tmp4 = and i32 %tmp2, -1020 160 %tmp7 = getelementptr i8, ptr %X, i32 %tmp4 161 %tmp9 = load i8, ptr %tmp7 162 ret i8 %tmp9 163} 164