1*f4a2713aSLionel Sambuc; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s 2*f4a2713aSLionel Sambuc 3*f4a2713aSLionel Sambuc; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X)) 4*f4a2713aSLionel Sambuc 5*f4a2713aSLionel Sambuc; Canolicalize the sequence shl/zext/lshr performing the zeroextend 6*f4a2713aSLionel Sambuc; as the last instruction of the sequence. 7*f4a2713aSLionel Sambuc; This will help DAGCombiner to identify and then fold the sequence 8*f4a2713aSLionel Sambuc; of shifts into a single AND. 9*f4a2713aSLionel Sambuc; This transformation is profitable if the shift amounts are the same 10*f4a2713aSLionel Sambuc; and if there is only one use of the zext. 11*f4a2713aSLionel Sambuc 12*f4a2713aSLionel Sambucdefine i16 @fun1(i8 zeroext %v) { 13*f4a2713aSLionel Sambucentry: 14*f4a2713aSLionel Sambuc %shr = lshr i8 %v, 4 15*f4a2713aSLionel Sambuc %ext = zext i8 %shr to i16 16*f4a2713aSLionel Sambuc %shl = shl i16 %ext, 4 17*f4a2713aSLionel Sambuc ret i16 %shl 18*f4a2713aSLionel Sambuc} 19*f4a2713aSLionel Sambuc 20*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun1 21*f4a2713aSLionel Sambuc; CHECK: and 22*f4a2713aSLionel Sambuc; CHECK-NOT: shr 23*f4a2713aSLionel Sambuc; CHECK-NOT: shl 24*f4a2713aSLionel Sambuc; CHECK: ret 25*f4a2713aSLionel Sambuc 26*f4a2713aSLionel Sambucdefine i32 @fun2(i8 zeroext %v) { 27*f4a2713aSLionel Sambucentry: 28*f4a2713aSLionel Sambuc %shr = lshr i8 %v, 4 29*f4a2713aSLionel Sambuc %ext = zext i8 %shr to i32 30*f4a2713aSLionel Sambuc %shl = shl i32 %ext, 4 31*f4a2713aSLionel Sambuc ret i32 %shl 32*f4a2713aSLionel Sambuc} 33*f4a2713aSLionel Sambuc 34*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun2 35*f4a2713aSLionel Sambuc; CHECK: and 36*f4a2713aSLionel Sambuc; CHECK-NOT: shr 37*f4a2713aSLionel Sambuc; CHECK-NOT: shl 38*f4a2713aSLionel Sambuc; CHECK: ret 39*f4a2713aSLionel Sambuc 40*f4a2713aSLionel Sambucdefine i32 @fun3(i16 zeroext %v) { 41*f4a2713aSLionel Sambucentry: 42*f4a2713aSLionel Sambuc %shr = lshr i16 %v, 4 43*f4a2713aSLionel Sambuc %ext = zext i16 %shr to i32 44*f4a2713aSLionel Sambuc %shl = shl i32 %ext, 4 45*f4a2713aSLionel Sambuc ret i32 %shl 46*f4a2713aSLionel Sambuc} 47*f4a2713aSLionel Sambuc 48*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun3 49*f4a2713aSLionel Sambuc; CHECK: and 50*f4a2713aSLionel Sambuc; CHECK-NOT: shr 51*f4a2713aSLionel Sambuc; CHECK-NOT: shl 52*f4a2713aSLionel Sambuc; CHECK: ret 53*f4a2713aSLionel Sambuc 54*f4a2713aSLionel Sambucdefine i64 @fun4(i8 zeroext %v) { 55*f4a2713aSLionel Sambucentry: 56*f4a2713aSLionel Sambuc %shr = lshr i8 %v, 4 57*f4a2713aSLionel Sambuc %ext = zext i8 %shr to i64 58*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 59*f4a2713aSLionel Sambuc ret i64 %shl 60*f4a2713aSLionel Sambuc} 61*f4a2713aSLionel Sambuc 62*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun4 63*f4a2713aSLionel Sambuc; CHECK: and 64*f4a2713aSLionel Sambuc; CHECK-NOT: shr 65*f4a2713aSLionel Sambuc; CHECK-NOT: shl 66*f4a2713aSLionel Sambuc; CHECK: ret 67*f4a2713aSLionel Sambuc 68*f4a2713aSLionel Sambucdefine i64 @fun5(i16 zeroext %v) { 69*f4a2713aSLionel Sambucentry: 70*f4a2713aSLionel Sambuc %shr = lshr i16 %v, 4 71*f4a2713aSLionel Sambuc %ext = zext i16 %shr to i64 72*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 73*f4a2713aSLionel Sambuc ret i64 %shl 74*f4a2713aSLionel Sambuc} 75*f4a2713aSLionel Sambuc 76*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun5 77*f4a2713aSLionel Sambuc; CHECK: and 78*f4a2713aSLionel Sambuc; CHECK-NOT: shr 79*f4a2713aSLionel Sambuc; CHECK-NOT: shl 80*f4a2713aSLionel Sambuc; CHECK: ret 81*f4a2713aSLionel Sambuc 82*f4a2713aSLionel Sambucdefine i64 @fun6(i32 zeroext %v) { 83*f4a2713aSLionel Sambucentry: 84*f4a2713aSLionel Sambuc %shr = lshr i32 %v, 4 85*f4a2713aSLionel Sambuc %ext = zext i32 %shr to i64 86*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 87*f4a2713aSLionel Sambuc ret i64 %shl 88*f4a2713aSLionel Sambuc} 89*f4a2713aSLionel Sambuc 90*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun6 91*f4a2713aSLionel Sambuc; CHECK: and 92*f4a2713aSLionel Sambuc; CHECK-NOT: shr 93*f4a2713aSLionel Sambuc; CHECK-NOT: shl 94*f4a2713aSLionel Sambuc; CHECK: ret 95*f4a2713aSLionel Sambuc 96*f4a2713aSLionel Sambuc; Don't fold the pattern if we use arithmetic shifts. 97*f4a2713aSLionel Sambuc 98*f4a2713aSLionel Sambucdefine i64 @fun7(i8 zeroext %v) { 99*f4a2713aSLionel Sambucentry: 100*f4a2713aSLionel Sambuc %shr = ashr i8 %v, 4 101*f4a2713aSLionel Sambuc %ext = zext i8 %shr to i64 102*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 103*f4a2713aSLionel Sambuc ret i64 %shl 104*f4a2713aSLionel Sambuc} 105*f4a2713aSLionel Sambuc 106*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun7 107*f4a2713aSLionel Sambuc; CHECK: sar 108*f4a2713aSLionel Sambuc; CHECK: shl 109*f4a2713aSLionel Sambuc; CHECK: ret 110*f4a2713aSLionel Sambuc 111*f4a2713aSLionel Sambucdefine i64 @fun8(i16 zeroext %v) { 112*f4a2713aSLionel Sambucentry: 113*f4a2713aSLionel Sambuc %shr = ashr i16 %v, 4 114*f4a2713aSLionel Sambuc %ext = zext i16 %shr to i64 115*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 116*f4a2713aSLionel Sambuc ret i64 %shl 117*f4a2713aSLionel Sambuc} 118*f4a2713aSLionel Sambuc 119*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun8 120*f4a2713aSLionel Sambuc; CHECK: sar 121*f4a2713aSLionel Sambuc; CHECK: shl 122*f4a2713aSLionel Sambuc; CHECK: ret 123*f4a2713aSLionel Sambuc 124*f4a2713aSLionel Sambucdefine i64 @fun9(i32 zeroext %v) { 125*f4a2713aSLionel Sambucentry: 126*f4a2713aSLionel Sambuc %shr = ashr i32 %v, 4 127*f4a2713aSLionel Sambuc %ext = zext i32 %shr to i64 128*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 129*f4a2713aSLionel Sambuc ret i64 %shl 130*f4a2713aSLionel Sambuc} 131*f4a2713aSLionel Sambuc 132*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun9 133*f4a2713aSLionel Sambuc; CHECK: sar 134*f4a2713aSLionel Sambuc; CHECK: shl 135*f4a2713aSLionel Sambuc; CHECK: ret 136*f4a2713aSLionel Sambuc 137*f4a2713aSLionel Sambuc; Don't fold the pattern if there is more than one use of the 138*f4a2713aSLionel Sambuc; operand in input to the shift left. 139*f4a2713aSLionel Sambuc 140*f4a2713aSLionel Sambucdefine i64 @fun10(i8 zeroext %v) { 141*f4a2713aSLionel Sambucentry: 142*f4a2713aSLionel Sambuc %shr = lshr i8 %v, 4 143*f4a2713aSLionel Sambuc %ext = zext i8 %shr to i64 144*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 145*f4a2713aSLionel Sambuc %add = add i64 %shl, %ext 146*f4a2713aSLionel Sambuc ret i64 %add 147*f4a2713aSLionel Sambuc} 148*f4a2713aSLionel Sambuc 149*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun10 150*f4a2713aSLionel Sambuc; CHECK: shr 151*f4a2713aSLionel Sambuc; CHECK: shl 152*f4a2713aSLionel Sambuc; CHECK: ret 153*f4a2713aSLionel Sambuc 154*f4a2713aSLionel Sambucdefine i64 @fun11(i16 zeroext %v) { 155*f4a2713aSLionel Sambucentry: 156*f4a2713aSLionel Sambuc %shr = lshr i16 %v, 4 157*f4a2713aSLionel Sambuc %ext = zext i16 %shr to i64 158*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 159*f4a2713aSLionel Sambuc %add = add i64 %shl, %ext 160*f4a2713aSLionel Sambuc ret i64 %add 161*f4a2713aSLionel Sambuc} 162*f4a2713aSLionel Sambuc 163*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun11 164*f4a2713aSLionel Sambuc; CHECK: shr 165*f4a2713aSLionel Sambuc; CHECK: shl 166*f4a2713aSLionel Sambuc; CHECK: ret 167*f4a2713aSLionel Sambuc 168*f4a2713aSLionel Sambucdefine i64 @fun12(i32 zeroext %v) { 169*f4a2713aSLionel Sambucentry: 170*f4a2713aSLionel Sambuc %shr = lshr i32 %v, 4 171*f4a2713aSLionel Sambuc %ext = zext i32 %shr to i64 172*f4a2713aSLionel Sambuc %shl = shl i64 %ext, 4 173*f4a2713aSLionel Sambuc %add = add i64 %shl, %ext 174*f4a2713aSLionel Sambuc ret i64 %add 175*f4a2713aSLionel Sambuc} 176*f4a2713aSLionel Sambuc 177*f4a2713aSLionel Sambuc; CHECK-LABEL: @fun12 178*f4a2713aSLionel Sambuc; CHECK: shr 179*f4a2713aSLionel Sambuc; CHECK: shl 180*f4a2713aSLionel Sambuc; CHECK: ret 181*f4a2713aSLionel Sambuc 182*f4a2713aSLionel Sambuc; PR17380 183*f4a2713aSLionel Sambuc; Make sure that the combined dags are legal if we run the DAGCombiner after 184*f4a2713aSLionel Sambuc; Legalization took place. The add instruction is redundant and increases by 185*f4a2713aSLionel Sambuc; one the number of uses of the zext. This prevents the transformation from 186*f4a2713aSLionel Sambuc; firing before dags are legalized and optimized. 187*f4a2713aSLionel Sambuc; Once the add is removed, the number of uses becomes one and therefore the 188*f4a2713aSLionel Sambuc; dags are canonicalized. After Legalization, we need to make sure that the 189*f4a2713aSLionel Sambuc; valuetype for the shift count is legal. 190*f4a2713aSLionel Sambuc; Verify also that we correctly fold the shl-shr sequence into an 191*f4a2713aSLionel Sambuc; AND with bitmask. 192*f4a2713aSLionel Sambuc 193*f4a2713aSLionel Sambucdefine void @g(i32 %a) { 194*f4a2713aSLionel Sambuc %b = lshr i32 %a, 2 195*f4a2713aSLionel Sambuc %c = zext i32 %b to i64 196*f4a2713aSLionel Sambuc %d = add i64 %c, 1 197*f4a2713aSLionel Sambuc %e = shl i64 %c, 2 198*f4a2713aSLionel Sambuc tail call void @f(i64 %e) 199*f4a2713aSLionel Sambuc ret void 200*f4a2713aSLionel Sambuc} 201*f4a2713aSLionel Sambuc 202*f4a2713aSLionel Sambuc; CHECK-LABEL: @g 203*f4a2713aSLionel Sambuc; CHECK-NOT: shr 204*f4a2713aSLionel Sambuc; CHECK-NOT: shl 205*f4a2713aSLionel Sambuc; CHECK: and 206*f4a2713aSLionel Sambuc; CHECK-NEXT: jmp 207*f4a2713aSLionel Sambuc 208*f4a2713aSLionel Sambucdeclare void @f(i64) 209*f4a2713aSLionel Sambuc 210