1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86,NOBMI-X86 3; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=X86,BMI-X86 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64,NOBMI-X64 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=X64,BMI-X64 6 7; Fold 8; ptr - (ptr & (alignment-1)) 9; To 10; ptr & (0 - alignment) 11; 12; This needs to be a backend-level fold because only by now pointers 13; are just registers; in middle-end IR this can only be done via @llvm.ptrmask() 14; intrinsic which is not sufficiently widely-spread yet. 15; 16; https://bugs.llvm.org/show_bug.cgi?id=44448 17 18; The basic positive tests 19 20define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { 21; X86-LABEL: t0_32: 22; X86: # %bb.0: 23; X86-NEXT: xorl %eax, %eax 24; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 25; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 26; X86-NEXT: retl 27; 28; X64-LABEL: t0_32: 29; X64: # %bb.0: 30; X64-NEXT: movl %esi, %eax 31; X64-NEXT: negl %eax 32; X64-NEXT: andl %edi, %eax 33; X64-NEXT: retq 34 %mask = add i32 %alignment, -1 35 %bias = and i32 %ptr, %mask 36 %r = sub i32 %ptr, %bias 37 ret i32 %r 38} 39define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { 40; X86-LABEL: t1_64: 41; X86: # %bb.0: 42; X86-NEXT: xorl %edx, %edx 43; X86-NEXT: xorl %eax, %eax 44; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 45; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx 46; X86-NEXT: andl {{[0-9]+}}(%esp), %edx 47; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 48; X86-NEXT: retl 49; 50; X64-LABEL: t1_64: 51; X64: # %bb.0: 52; X64-NEXT: movq %rsi, %rax 53; X64-NEXT: negq %rax 54; X64-NEXT: andq %rdi, %rax 55; X64-NEXT: retq 56 %mask = add i64 %alignment, -1 57 %bias = and i64 %ptr, %mask 58 %r = sub i64 %ptr, %bias 59 ret i64 %r 60} 61 62define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { 63; X86-LABEL: t2_commutative: 64; X86: # %bb.0: 65; X86-NEXT: xorl %eax, %eax 66; X86-NEXT: subl {{[0-9]+}}(%esp), %eax 67; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 68; X86-NEXT: retl 69; 70; X64-LABEL: t2_commutative: 71; X64: # %bb.0: 72; X64-NEXT: movl %esi, %eax 73; X64-NEXT: negl %eax 74; X64-NEXT: andl %edi, %eax 75; X64-NEXT: retq 76 %mask = add i32 %alignment, -1 77 %bias = and i32 %mask, %ptr ; swapped 78 %r = sub i32 %ptr, %bias 79 ret i32 %r 80} 81 82; Extra use tests 83 84define i32 @t3_extrause0(i32 %ptr, i32 %alignment, ptr %mask_storage) nounwind { 85; X86-LABEL: t3_extrause0: 86; X86: # %bb.0: 87; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 88; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X86-NEXT: leal -1(%eax), %edx 90; X86-NEXT: movl %edx, (%ecx) 91; X86-NEXT: negl %eax 92; X86-NEXT: andl {{[0-9]+}}(%esp), %eax 93; X86-NEXT: retl 94; 95; X64-LABEL: t3_extrause0: 96; X64: # %bb.0: 97; X64-NEXT: movl %esi, %eax 98; X64-NEXT: leal -1(%rax), %ecx 99; X64-NEXT: movl %ecx, (%rdx) 100; X64-NEXT: negl %eax 101; X64-NEXT: andl %edi, %eax 102; X64-NEXT: # kill: def $eax killed $eax killed $rax 103; X64-NEXT: retq 104 %mask = add i32 %alignment, -1 105 store i32 %mask, ptr %mask_storage 106 %bias = and i32 %ptr, %mask 107 %r = sub i32 %ptr, %bias 108 ret i32 %r 109} 110define i32 @n4_extrause1(i32 %ptr, i32 %alignment, ptr %bias_storage) nounwind { 111; X86-LABEL: n4_extrause1: 112; X86: # %bb.0: 113; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 114; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 115; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 116; X86-NEXT: decl %edx 117; X86-NEXT: andl %eax, %edx 118; X86-NEXT: movl %edx, (%ecx) 119; X86-NEXT: subl %edx, %eax 120; X86-NEXT: retl 121; 122; X64-LABEL: n4_extrause1: 123; X64: # %bb.0: 124; X64-NEXT: movl %edi, %eax 125; X64-NEXT: decl %esi 126; X64-NEXT: andl %edi, %esi 127; X64-NEXT: movl %esi, (%rdx) 128; X64-NEXT: subl %esi, %eax 129; X64-NEXT: retq 130 %mask = add i32 %alignment, -1 131 %bias = and i32 %ptr, %mask ; has extra uses, can't fold 132 store i32 %bias, ptr %bias_storage 133 %r = sub i32 %ptr, %bias 134 ret i32 %r 135} 136define i32 @n5_extrause2(i32 %ptr, i32 %alignment, ptr %mask_storage, ptr %bias_storage) nounwind { 137; X86-LABEL: n5_extrause2: 138; X86: # %bb.0: 139; X86-NEXT: pushl %esi 140; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 141; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 142; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 143; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 144; X86-NEXT: decl %esi 145; X86-NEXT: movl %esi, (%edx) 146; X86-NEXT: andl %eax, %esi 147; X86-NEXT: movl %esi, (%ecx) 148; X86-NEXT: subl %esi, %eax 149; X86-NEXT: popl %esi 150; X86-NEXT: retl 151; 152; X64-LABEL: n5_extrause2: 153; X64: # %bb.0: 154; X64-NEXT: movl %edi, %eax 155; X64-NEXT: decl %esi 156; X64-NEXT: movl %esi, (%rdx) 157; X64-NEXT: andl %edi, %esi 158; X64-NEXT: movl %esi, (%rcx) 159; X64-NEXT: subl %esi, %eax 160; X64-NEXT: retq 161 %mask = add i32 %alignment, -1 162 store i32 %mask, ptr %mask_storage 163 %bias = and i32 %ptr, %mask ; has extra uses, can't fold 164 store i32 %bias, ptr %bias_storage 165 %r = sub i32 %ptr, %bias 166 ret i32 %r 167} 168 169; Negative tests 170 171define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { 172; X86-LABEL: n6_different_ptrs: 173; X86: # %bb.0: 174; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 175; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 176; X86-NEXT: decl %ecx 177; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 178; X86-NEXT: subl %ecx, %eax 179; X86-NEXT: retl 180; 181; X64-LABEL: n6_different_ptrs: 182; X64: # %bb.0: 183; X64-NEXT: movl %edi, %eax 184; X64-NEXT: decl %edx 185; X64-NEXT: andl %esi, %edx 186; X64-NEXT: subl %edx, %eax 187; X64-NEXT: retq 188 %mask = add i32 %alignment, -1 189 %bias = and i32 %ptr1, %mask ; not %ptr0 190 %r = sub i32 %ptr0, %bias ; not %ptr1 191 ret i32 %r 192} 193define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { 194; X86-LABEL: n7_different_ptrs_commutative: 195; X86: # %bb.0: 196; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 197; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 198; X86-NEXT: decl %ecx 199; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx 200; X86-NEXT: subl %ecx, %eax 201; X86-NEXT: retl 202; 203; X64-LABEL: n7_different_ptrs_commutative: 204; X64: # %bb.0: 205; X64-NEXT: movl %edi, %eax 206; X64-NEXT: decl %edx 207; X64-NEXT: andl %esi, %edx 208; X64-NEXT: subl %edx, %eax 209; X64-NEXT: retq 210 %mask = add i32 %alignment, -1 211 %bias = and i32 %mask, %ptr1 ; swapped, not %ptr0 212 %r = sub i32 %ptr0, %bias ; not %ptr1 213 ret i32 %r 214} 215 216define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind { 217; NOBMI-X86-LABEL: n8_not_lowbit_mask: 218; NOBMI-X86: # %bb.0: 219; NOBMI-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 220; NOBMI-X86-NEXT: incl %eax 221; NOBMI-X86-NEXT: notl %eax 222; NOBMI-X86-NEXT: andl {{[0-9]+}}(%esp), %eax 223; NOBMI-X86-NEXT: retl 224; 225; BMI-X86-LABEL: n8_not_lowbit_mask: 226; BMI-X86: # %bb.0: 227; BMI-X86-NEXT: movl {{[0-9]+}}(%esp), %eax 228; BMI-X86-NEXT: incl %eax 229; BMI-X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax 230; BMI-X86-NEXT: retl 231; 232; NOBMI-X64-LABEL: n8_not_lowbit_mask: 233; NOBMI-X64: # %bb.0: 234; NOBMI-X64-NEXT: # kill: def $esi killed $esi def $rsi 235; NOBMI-X64-NEXT: leal 1(%rsi), %eax 236; NOBMI-X64-NEXT: notl %eax 237; NOBMI-X64-NEXT: andl %edi, %eax 238; NOBMI-X64-NEXT: retq 239; 240; BMI-X64-LABEL: n8_not_lowbit_mask: 241; BMI-X64: # %bb.0: 242; BMI-X64-NEXT: incl %esi 243; BMI-X64-NEXT: andnl %edi, %esi, %eax 244; BMI-X64-NEXT: retq 245 %mask = add i32 %alignment, 1 ; not -1 246 %bias = and i32 %ptr, %mask 247 %r = sub i32 %ptr, %bias 248 ret i32 %r 249} 250 251define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind { 252; X86-LABEL: n9_sub_is_not_commutative: 253; X86: # %bb.0: 254; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 255; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 256; X86-NEXT: decl %eax 257; X86-NEXT: andl %ecx, %eax 258; X86-NEXT: subl %ecx, %eax 259; X86-NEXT: retl 260; 261; X64-LABEL: n9_sub_is_not_commutative: 262; X64: # %bb.0: 263; X64-NEXT: # kill: def $esi killed $esi def $rsi 264; X64-NEXT: leal -1(%rsi), %eax 265; X64-NEXT: andl %edi, %eax 266; X64-NEXT: subl %edi, %eax 267; X64-NEXT: retq 268 %mask = add i32 %alignment, -1 269 %bias = and i32 %ptr, %mask 270 %r = sub i32 %bias, %ptr ; wrong order 271 ret i32 %r 272} 273