1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; Test high-part i64->i128 multiplications on arch15. 3; 4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s 5 6; Check zero-extended multiplication in which only the high part is used. 7define i64 @f1(i64 %dummy, i64 %a, i64 %b) { 8; CHECK-LABEL: f1: 9; CHECK: # %bb.0: 10; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 11; CHECK-NEXT: mlgr %r2, %r4 12; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 13; CHECK-NEXT: br %r14 14 %ax = zext i64 %a to i128 15 %bx = zext i64 %b to i128 16 %mulx = mul i128 %ax, %bx 17 %highx = lshr i128 %mulx, 64 18 %high = trunc i128 %highx to i64 19 ret i64 %high 20} 21 22; Check sign-extended multiplication in which only the high part is used. 23define i64 @f2(i64 %dummy, i64 %a, i64 %b) { 24; CHECK-LABEL: f2: 25; CHECK: # %bb.0: 26; CHECK-NEXT: mgrk %r2, %r3, %r4 27; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 28; CHECK-NEXT: br %r14 29 %ax = sext i64 %a to i128 30 %bx = sext i64 %b to i128 31 %mulx = mul i128 %ax, %bx 32 %highx = lshr i128 %mulx, 64 33 %high = trunc i128 %highx to i64 34 ret i64 %high 35} 36 37; Check zero-extended multiplication in which only part of the high half 38; is used. FIXME: Should use MLGR as well. 39define i64 @f3(i64 %dummy, i64 %a, i64 %b) { 40; CHECK-LABEL: f3: 41; CHECK: # %bb.0: 42; CHECK-NEXT: vgbm %v0, 0 43; CHECK-NEXT: vgbm %v1, 0 44; CHECK-NEXT: vlvgg %v1, %r3, 1 45; CHECK-NEXT: vlvgg %v0, %r4, 1 46; CHECK-NEXT: vmlq %v0, %v1, %v0 47; CHECK-NEXT: vrepib %v1, 67 48; CHECK-NEXT: vsrlb %v0, %v0, %v1 49; CHECK-NEXT: vsrl %v0, %v0, %v1 50; CHECK-NEXT: vlgvg %r2, %v0, 1 51; CHECK-NEXT: br %r14 52 %ax = zext i64 %a to i128 53 %bx = zext i64 %b to i128 54 %mulx = mul i128 %ax, %bx 55 %highx = lshr i128 %mulx, 67 56 %high = trunc i128 %highx to i64 57 ret i64 %high 58} 59 60; Check zero-extended multiplication in which the result is split into 61; high and low halves. FIXME: Should use MLGR as well. 62define i64 @f4(i64 %dummy, i64 %a, i64 %b) { 63; CHECK-LABEL: f4: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vgbm %v0, 0 66; CHECK-NEXT: vgbm %v1, 0 67; CHECK-NEXT: vlvgg %v1, %r3, 1 68; CHECK-NEXT: vlvgg %v0, %r4, 1 69; CHECK-NEXT: vmlq %v0, %v1, %v0 70; CHECK-NEXT: vrepib %v1, 64 71; CHECK-NEXT: vsrlb %v1, %v0, %v1 72; CHECK-NEXT: vo %v0, %v1, %v0 73; CHECK-NEXT: vlgvg %r2, %v0, 1 74; CHECK-NEXT: br %r14 75 %ax = zext i64 %a to i128 76 %bx = zext i64 %b to i128 77 %mulx = mul i128 %ax, %bx 78 %highx = lshr i128 %mulx, 64 79 %high = trunc i128 %highx to i64 80 %low = trunc i128 %mulx to i64 81 %or = or i64 %high, %low 82 ret i64 %or 83} 84 85; Check division by a constant, which should use multiplication instead. 86define i64 @f5(i64 %dummy, i64 %a) { 87; CHECK-LABEL: f5: 88; CHECK: # %bb.0: 89; CHECK-NEXT: llihf %r0, 1782028570 90; CHECK-NEXT: oilf %r0, 598650223 91; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 92; CHECK-NEXT: mlgr %r2, %r0 93; CHECK-NEXT: srlg %r2, %r2, 9 94; CHECK-NEXT: br %r14 95 %res = udiv i64 %a, 1234 96 ret i64 %res 97} 98 99; Check MLG with no displacement. 100define i64 @f6(i64 %dummy, i64 %a, ptr %src) { 101; CHECK-LABEL: f6: 102; CHECK: # %bb.0: 103; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 104; CHECK-NEXT: mlg %r2, 0(%r4) 105; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 106; CHECK-NEXT: br %r14 107 %b = load i64, ptr %src 108 %ax = zext i64 %a to i128 109 %bx = zext i64 %b to i128 110 %mulx = mul i128 %ax, %bx 111 %highx = lshr i128 %mulx, 64 112 %high = trunc i128 %highx to i64 113 ret i64 %high 114} 115 116; Check the high end of the aligned MLG range. 117define i64 @f7(i64 %dummy, i64 %a, ptr %src) { 118; CHECK-LABEL: f7: 119; CHECK: # %bb.0: 120; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 121; CHECK-NEXT: mlg %r2, 524280(%r4) 122; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 123; CHECK-NEXT: br %r14 124 %ptr = getelementptr i64, ptr %src, i64 65535 125 %b = load i64, ptr %ptr 126 %ax = zext i64 %a to i128 127 %bx = zext i64 %b to i128 128 %mulx = mul i128 %ax, %bx 129 %highx = lshr i128 %mulx, 64 130 %high = trunc i128 %highx to i64 131 ret i64 %high 132} 133 134; Check the next doubleword up, which requires separate address logic. 135; Other sequences besides this one would be OK. 136define i64 @f8(i64 %dummy, i64 %a, ptr %src) { 137; CHECK-LABEL: f8: 138; CHECK: # %bb.0: 139; CHECK-NEXT: agfi %r4, 524288 140; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 141; CHECK-NEXT: mlg %r2, 0(%r4) 142; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 143; CHECK-NEXT: br %r14 144 %ptr = getelementptr i64, ptr %src, i64 65536 145 %b = load i64, ptr %ptr 146 %ax = zext i64 %a to i128 147 %bx = zext i64 %b to i128 148 %mulx = mul i128 %ax, %bx 149 %highx = lshr i128 %mulx, 64 150 %high = trunc i128 %highx to i64 151 ret i64 %high 152} 153 154; Check the high end of the negative aligned MLG range. 155define i64 @f9(i64 %dummy, i64 %a, ptr %src) { 156; CHECK-LABEL: f9: 157; CHECK: # %bb.0: 158; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 159; CHECK-NEXT: mlg %r2, -8(%r4) 160; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 161; CHECK-NEXT: br %r14 162 %ptr = getelementptr i64, ptr %src, i64 -1 163 %b = load i64, ptr %ptr 164 %ax = zext i64 %a to i128 165 %bx = zext i64 %b to i128 166 %mulx = mul i128 %ax, %bx 167 %highx = lshr i128 %mulx, 64 168 %high = trunc i128 %highx to i64 169 ret i64 %high 170} 171 172; Check the low end of the MLG range. 173define i64 @f10(i64 %dummy, i64 %a, ptr %src) { 174; CHECK-LABEL: f10: 175; CHECK: # %bb.0: 176; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 177; CHECK-NEXT: mlg %r2, -524288(%r4) 178; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 179; CHECK-NEXT: br %r14 180 %ptr = getelementptr i64, ptr %src, i64 -65536 181 %b = load i64, ptr %ptr 182 %ax = zext i64 %a to i128 183 %bx = zext i64 %b to i128 184 %mulx = mul i128 %ax, %bx 185 %highx = lshr i128 %mulx, 64 186 %high = trunc i128 %highx to i64 187 ret i64 %high 188} 189 190; Check the next doubleword down, which needs separate address logic. 191; Other sequences besides this one would be OK. 192define i64 @f11(ptr %dest, i64 %a, ptr %src) { 193; CHECK-LABEL: f11: 194; CHECK: # %bb.0: 195; CHECK-NEXT: agfi %r4, -524296 196; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 197; CHECK-NEXT: mlg %r2, 0(%r4) 198; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 199; CHECK-NEXT: br %r14 200 %ptr = getelementptr i64, ptr %src, i64 -65537 201 %b = load i64, ptr %ptr 202 %ax = zext i64 %a to i128 203 %bx = zext i64 %b to i128 204 %mulx = mul i128 %ax, %bx 205 %highx = lshr i128 %mulx, 64 206 %high = trunc i128 %highx to i64 207 ret i64 %high 208} 209 210; Check that MLG allows an index. 211define i64 @f12(ptr %dest, i64 %a, i64 %src, i64 %index) { 212; CHECK-LABEL: f12: 213; CHECK: # %bb.0: 214; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q 215; CHECK-NEXT: mlg %r2, 524287(%r5,%r4) 216; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q 217; CHECK-NEXT: br %r14 218 %add1 = add i64 %src, %index 219 %add2 = add i64 %add1, 524287 220 %ptr = inttoptr i64 %add2 to ptr 221 %b = load i64, ptr %ptr 222 %ax = zext i64 %a to i128 223 %bx = zext i64 %b to i128 224 %mulx = mul i128 %ax, %bx 225 %highx = lshr i128 %mulx, 64 226 %high = trunc i128 %highx to i64 227 ret i64 %high 228} 229 230