1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7define { i64, i64 } @mul_full_64_variant0(i64 %x, i64 %y) { 8; CHECK-LABEL: @mul_full_64_variant0( 9; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 10; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 11; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 12; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 13; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 14; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] 15; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] 16; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] 17; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 18; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 19; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 20; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 21; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 22; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] 23; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 24; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 25; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 26; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]] 27; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] 28; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 29; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 30; CHECK-NEXT: ret { i64, i64 } [[RES]] 31; 32 %xl = and i64 %x, 4294967295 33 %xh = lshr i64 %x, 32 34 %yl = and i64 %y, 4294967295 35 %yh = lshr i64 %y, 32 36 37 %t0 = mul nuw i64 %yl, %xl 38 %t1 = mul nuw i64 %yl, %xh 39 %t2 = mul nuw i64 %yh, %xl 40 %t3 = mul nuw i64 %yh, %xh 41 42 %t0l = and i64 %t0, 4294967295 43 %t0h = lshr i64 %t0, 32 44 45 %u0 = add i64 %t0h, %t1 46 %u0l = and i64 %u0, 4294967295 47 %u0h = lshr i64 %u0, 32 48 49 %u1 = add i64 %u0l, %t2 50 %u1ls = shl i64 %u1, 32 51 %u1h = lshr i64 %u1, 32 52 53 %u2 = add i64 %u0h, %t3 54 55 %lo = or i64 %u1ls, %t0l 56 %hi = add i64 %u2, %u1h 57 58 %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 59 %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 60 ret { i64, i64 } %res 61} 62 63; The following variants 1 - 3 are generated with this C++ program: 64; 65; #include <stdint.h> 66; 67; uint64_t mulxu(uint64_t a, uint64_t b, uint64_t *rhi) { 68; auto hi = [](uint64_t x) { return x >> 32; }; 69; auto lo = [](uint64_t x) { return uint32_t(x); }; 70; uint64_t xl = lo(a); 71; uint64_t xh = hi(a); 72; uint64_t yl = lo(b); 73; uint64_t yh = hi(b); 74; 75; uint64_t rhh = xh * yh; 76; uint64_t rhl = xh * yl; 77; uint64_t rlh = xl * yh; 78; uint64_t rll = xl * yl; 79; 80; *rhi = rhh + hi(rhl + hi(rll)) + hi((rlh + lo(rhl + hi(rll)))); 81; #if ONE 82; return a*b; 83; #elif TWO 84; return (uint64_t(lo(rlh + lo(rhl + hi(rll)))) << 32) + lo(rll); 85; #elif THREE 86; return ((rlh + rhl) << 32) + rll; 87; #endif 88; } 89 90define i64 @mul_full_64_variant1(i64 %a, i64 %b, ptr nocapture %rhi) { 91; CHECK-LABEL: @mul_full_64_variant1( 92; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 93; CHECK-NEXT: [[SHR_I43:%.*]] = lshr i64 [[A]], 32 94; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 95; CHECK-NEXT: [[SHR_I41:%.*]] = lshr i64 [[B]], 32 96; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I41]], [[SHR_I43]] 97; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I43]] 98; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I41]], [[CONV]] 99; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] 100; CHECK-NEXT: [[SHR_I40:%.*]] = lshr i64 [[MUL7]], 32 101; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I40]], [[MUL5]] 102; CHECK-NEXT: [[SHR_I39:%.*]] = lshr i64 [[ADD]], 32 103; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I39]], [[MUL]] 104; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 105; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] 106; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32 107; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]] 108; CHECK-NEXT: store i64 [[ADD17]], ptr [[RHI:%.*]], align 8 109; CHECK-NEXT: [[MULLO:%.*]] = mul i64 [[B]], [[A]] 110; CHECK-NEXT: ret i64 [[MULLO]] 111; 112 %conv = and i64 %a, 4294967295 113 %shr.i43 = lshr i64 %a, 32 114 %conv3 = and i64 %b, 4294967295 115 %shr.i41 = lshr i64 %b, 32 116 %mul = mul nuw i64 %shr.i41, %shr.i43 117 %mul5 = mul nuw i64 %conv3, %shr.i43 118 %mul6 = mul nuw i64 %shr.i41, %conv 119 %mul7 = mul nuw i64 %conv3, %conv 120 %shr.i40 = lshr i64 %mul7, 32 121 %add = add i64 %shr.i40, %mul5 122 %shr.i39 = lshr i64 %add, 32 123 %add10 = add i64 %shr.i39, %mul 124 %conv14 = and i64 %add, 4294967295 125 %add15 = add i64 %conv14, %mul6 126 %shr.i = lshr i64 %add15, 32 127 %add17 = add i64 %add10, %shr.i 128 store i64 %add17, ptr %rhi, align 8 129 %mullo = mul i64 %b, %a 130 ret i64 %mullo 131} 132 133define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) { 134; CHECK-LABEL: @mul_full_64_variant2( 135; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 136; CHECK-NEXT: [[SHR_I58:%.*]] = lshr i64 [[A]], 32 137; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 138; CHECK-NEXT: [[SHR_I56:%.*]] = lshr i64 [[B]], 32 139; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I56]], [[SHR_I58]] 140; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I58]] 141; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I56]], [[CONV]] 142; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] 143; CHECK-NEXT: [[SHR_I55:%.*]] = lshr i64 [[MUL7]], 32 144; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I55]], [[MUL5]] 145; CHECK-NEXT: [[SHR_I54:%.*]] = lshr i64 [[ADD]], 32 146; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I54]], [[MUL]] 147; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 148; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] 149; CHECK-NEXT: [[SHR_I51:%.*]] = lshr i64 [[ADD15]], 32 150; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I51]] 151; CHECK-NEXT: store i64 [[ADD17]], ptr [[RHI:%.*]], align 8 152; CHECK-NEXT: [[CONV24:%.*]] = shl i64 [[ADD15]], 32 153; CHECK-NEXT: [[CONV26:%.*]] = and i64 [[MUL7]], 4294967295 154; CHECK-NEXT: [[ADD27:%.*]] = or disjoint i64 [[CONV24]], [[CONV26]] 155; CHECK-NEXT: ret i64 [[ADD27]] 156; 157 %conv = and i64 %a, 4294967295 158 %shr.i58 = lshr i64 %a, 32 159 %conv3 = and i64 %b, 4294967295 160 %shr.i56 = lshr i64 %b, 32 161 %mul = mul nuw i64 %shr.i56, %shr.i58 162 %mul5 = mul nuw i64 %conv3, %shr.i58 163 %mul6 = mul nuw i64 %shr.i56, %conv 164 %mul7 = mul nuw i64 %conv3, %conv 165 %shr.i55 = lshr i64 %mul7, 32 166 %add = add i64 %shr.i55, %mul5 167 %shr.i54 = lshr i64 %add, 32 168 %add10 = add i64 %shr.i54, %mul 169 %conv14 = and i64 %add, 4294967295 170 %add15 = add i64 %conv14, %mul6 171 %shr.i51 = lshr i64 %add15, 32 172 %add17 = add i64 %add10, %shr.i51 173 store i64 %add17, ptr %rhi, align 8 174 %conv24 = shl i64 %add15, 32 175 %conv26 = and i64 %mul7, 4294967295 176 %add27 = or i64 %conv24, %conv26 177 ret i64 %add27 178} 179 180; Negative test case for mul_fold function: MUL7 is used in more than one place 181define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) { 182; CHECK-LABEL: @mul_full_64_variant3( 183; CHECK-NEXT: [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295 184; CHECK-NEXT: [[SHR_I45:%.*]] = lshr i64 [[A]], 32 185; CHECK-NEXT: [[CONV3:%.*]] = and i64 [[B:%.*]], 4294967295 186; CHECK-NEXT: [[SHR_I43:%.*]] = lshr i64 [[B]], 32 187; CHECK-NEXT: [[MUL:%.*]] = mul nuw i64 [[SHR_I43]], [[SHR_I45]] 188; CHECK-NEXT: [[MUL5:%.*]] = mul nuw i64 [[CONV3]], [[SHR_I45]] 189; CHECK-NEXT: [[MUL6:%.*]] = mul nuw i64 [[SHR_I43]], [[CONV]] 190; CHECK-NEXT: [[MUL7:%.*]] = mul nuw i64 [[CONV3]], [[CONV]] 191; CHECK-NEXT: [[SHR_I42:%.*]] = lshr i64 [[MUL7]], 32 192; CHECK-NEXT: [[ADD:%.*]] = add i64 [[SHR_I42]], [[MUL5]] 193; CHECK-NEXT: [[SHR_I41:%.*]] = lshr i64 [[ADD]], 32 194; CHECK-NEXT: [[ADD10:%.*]] = add i64 [[SHR_I41]], [[MUL]] 195; CHECK-NEXT: [[CONV14:%.*]] = and i64 [[ADD]], 4294967295 196; CHECK-NEXT: [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]] 197; CHECK-NEXT: [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32 198; CHECK-NEXT: [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]] 199; CHECK-NEXT: store i64 [[ADD17]], ptr [[RHI:%.*]], align 8 200; CHECK-NEXT: [[ADD18:%.*]] = add i64 [[MUL6]], [[MUL5]] 201; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ADD18]], 32 202; CHECK-NEXT: [[ADD19:%.*]] = add i64 [[SHL]], [[MUL7]] 203; CHECK-NEXT: ret i64 [[ADD19]] 204; 205 %conv = and i64 %a, 4294967295 206 %shr.i45 = lshr i64 %a, 32 207 %conv3 = and i64 %b, 4294967295 208 %shr.i43 = lshr i64 %b, 32 209 %mul = mul nuw i64 %shr.i43, %shr.i45 210 %mul5 = mul nuw i64 %conv3, %shr.i45 211 %mul6 = mul nuw i64 %shr.i43, %conv 212 %mul7 = mul nuw i64 %conv3, %conv 213 %shr.i42 = lshr i64 %mul7, 32 214 %add = add i64 %shr.i42, %mul5 215 %shr.i41 = lshr i64 %add, 32 216 %add10 = add i64 %shr.i41, %mul 217 %conv14 = and i64 %add, 4294967295 218 %add15 = add i64 %conv14, %mul6 219 %shr.i = lshr i64 %add15, 32 220 %add17 = add i64 %add10, %shr.i 221 store i64 %add17, ptr %rhi, align 8 222 %add18 = add i64 %mul6, %mul5 223 %shl = shl i64 %add18, 32 224 %add19 = add i64 %shl, %mul7 225 ret i64 %add19 226} 227 228 229define { i32, i32 } @mul_full_32(i32 %x, i32 %y) { 230; CHECK-LABEL: @mul_full_32( 231; CHECK-NEXT: [[XL:%.*]] = and i32 [[X:%.*]], 65535 232; CHECK-NEXT: [[XH:%.*]] = lshr i32 [[X]], 16 233; CHECK-NEXT: [[YL:%.*]] = and i32 [[Y:%.*]], 65535 234; CHECK-NEXT: [[YH:%.*]] = lshr i32 [[Y]], 16 235; CHECK-NEXT: [[T0:%.*]] = mul nuw i32 [[YL]], [[XL]] 236; CHECK-NEXT: [[T1:%.*]] = mul nuw i32 [[YL]], [[XH]] 237; CHECK-NEXT: [[T2:%.*]] = mul nuw i32 [[YH]], [[XL]] 238; CHECK-NEXT: [[T3:%.*]] = mul nuw i32 [[YH]], [[XH]] 239; CHECK-NEXT: [[T0L:%.*]] = and i32 [[T0]], 65535 240; CHECK-NEXT: [[T0H:%.*]] = lshr i32 [[T0]], 16 241; CHECK-NEXT: [[U0:%.*]] = add i32 [[T0H]], [[T1]] 242; CHECK-NEXT: [[U0L:%.*]] = and i32 [[U0]], 65535 243; CHECK-NEXT: [[U0H:%.*]] = lshr i32 [[U0]], 16 244; CHECK-NEXT: [[U1:%.*]] = add i32 [[U0L]], [[T2]] 245; CHECK-NEXT: [[U1LS:%.*]] = shl i32 [[U1]], 16 246; CHECK-NEXT: [[U1H:%.*]] = lshr i32 [[U1]], 16 247; CHECK-NEXT: [[U2:%.*]] = add i32 [[U0H]], [[T3]] 248; CHECK-NEXT: [[LO:%.*]] = or disjoint i32 [[U1LS]], [[T0L]] 249; CHECK-NEXT: [[HI:%.*]] = add i32 [[U2]], [[U1H]] 250; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i32, i32 } undef, i32 [[LO]], 0 251; CHECK-NEXT: [[RES:%.*]] = insertvalue { i32, i32 } [[RES_LO]], i32 [[HI]], 1 252; CHECK-NEXT: ret { i32, i32 } [[RES]] 253; 254 %xl = and i32 %x, 65535 255 %xh = lshr i32 %x, 16 256 %yl = and i32 %y, 65535 257 %yh = lshr i32 %y, 16 258 259 %t0 = mul nuw i32 %yl, %xl 260 %t1 = mul nuw i32 %yl, %xh 261 %t2 = mul nuw i32 %yh, %xl 262 %t3 = mul nuw i32 %yh, %xh 263 264 %t0l = and i32 %t0, 65535 265 %t0h = lshr i32 %t0, 16 266 267 %u0 = add i32 %t0h, %t1 268 %u0l = and i32 %u0, 65535 269 %u0h = lshr i32 %u0, 16 270 271 %u1 = add i32 %u0l, %t2 272 %u1ls = shl i32 %u1, 16 273 %u1h = lshr i32 %u1, 16 274 275 %u2 = add i32 %u0h, %t3 276 277 %lo = or i32 %u1ls, %t0l 278 %hi = add i32 %u2, %u1h 279 280 %res_lo = insertvalue { i32, i32 } undef, i32 %lo, 0 281 %res = insertvalue { i32, i32 } %res_lo, i32 %hi, 1 282 ret { i32, i32 } %res 283} 284 285 286declare i64 @get_number() 287 288; In the following test cases %x and %y are instructions, not arguments. 289; This tests the placement of mul i128 and zexts. 290; Instructions are also shuffled. 291 292define { i64, i64 } @mul_full_64_variant0_1() { 293; CHECK-LABEL: @mul_full_64_variant0_1( 294; CHECK-NEXT: [[TMP1:%.*]] = call i64 @get_number() 295; CHECK-NEXT: [[YL:%.*]] = and i64 [[TMP1]], 4294967295 296; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[TMP1]], 32 297; CHECK-NEXT: [[TMP2:%.*]] = call i64 @get_number() 298; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[TMP2]], 32 299; CHECK-NEXT: [[XL:%.*]] = and i64 [[TMP2]], 4294967295 300; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] 301; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] 302; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] 303; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 304; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 305; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 306; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 307; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] 308; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 309; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 310; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 311; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] 312; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 313; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 314; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]] 315; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 316; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 317; CHECK-NEXT: ret { i64, i64 } [[RES]] 318; 319 %1 = call i64 @get_number() 320 %yl = and i64 %1, 4294967295 321 %yh = lshr i64 %1, 32 322 323 %2 = call i64 @get_number() 324 %xh = lshr i64 %2, 32 325 %xl = and i64 %2, 4294967295 326 327 %t1 = mul nuw i64 %yl, %xh 328 %t3 = mul nuw i64 %yh, %xh 329 %t2 = mul nuw i64 %yh, %xl 330 %t0 = mul nuw i64 %yl, %xl 331 332 %t0h = lshr i64 %t0, 32 333 %u0 = add i64 %t0h, %t1 334 %u0l = and i64 %u0, 4294967295 335 %u1 = add i64 %u0l, %t2 336 %u0h = lshr i64 %u0, 32 337 %u2 = add i64 %u0h, %t3 338 %u1h = lshr i64 %u1, 32 339 %hi = add i64 %u2, %u1h 340 341 %u1ls = shl i64 %u1, 32 342 %t0l = and i64 %t0, 4294967295 343 %lo = or i64 %u1ls, %t0l 344 345 %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 346 %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 347 ret { i64, i64 } %res 348} 349 350define { i64, i64 } @mul_full_64_variant0_2() { 351; CHECK-LABEL: @mul_full_64_variant0_2( 352; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() 353; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() 354; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 355; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 356; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 357; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 358; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] 359; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] 360; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] 361; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] 362; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 363; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] 364; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 365; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] 366; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 367; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 368; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 369; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] 370; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 371; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 372; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[T0L]], [[U1LS]] 373; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 374; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 375; CHECK-NEXT: ret { i64, i64 } [[RES]] 376; 377 %x = call i64 @get_number() 378 %y = call i64 @get_number() 379 380 %yl = and i64 %y, 4294967295 381 %yh = lshr i64 %y, 32 382 %xh = lshr i64 %x, 32 383 %xl = and i64 %x, 4294967295 384 385 %t3 = mul nuw i64 %xh, %yh 386 %t2 = mul nuw i64 %xl, %yh 387 %t1 = mul nuw i64 %xh, %yl 388 %t0 = mul nuw i64 %xl, %yl 389 390 %t0h = lshr i64 %t0, 32 391 %u0 = add i64 %t1, %t0h 392 %u0l = and i64 %u0, 4294967295 393 %u1 = add i64 %t2, %u0l 394 %u0h = lshr i64 %u0, 32 395 %u2 = add i64 %u0h, %t3 396 %u1h = lshr i64 %u1, 32 397 %hi = add i64 %u1h, %u2 398 399 %u1ls = shl i64 %u1, 32 400 %t0l = and i64 %t0, 4294967295 401 %lo = or i64 %t0l, %u1ls 402 403 %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 404 %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 405 ret { i64, i64 } %res 406} 407 408 409define i64 @umulh_64(i64 %x, i64 %y) { 410; CHECK-LABEL: @umulh_64( 411; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 412; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 413; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 414; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 415; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 416; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] 417; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] 418; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] 419; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 420; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 421; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 422; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 423; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] 424; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 425; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 426; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] 427; CHECK-NEXT: ret i64 [[HI]] 428; 429 %xl = and i64 %x, 4294967295 430 %xh = lshr i64 %x, 32 431 %yl = and i64 %y, 4294967295 432 %yh = lshr i64 %y, 32 433 434 %t0 = mul nuw i64 %yl, %xl 435 %t1 = mul nuw i64 %yl, %xh 436 %t2 = mul nuw i64 %yh, %xl 437 %t3 = mul nuw i64 %yh, %xh 438 439 %t0h = lshr i64 %t0, 32 440 441 %u0 = add i64 %t0h, %t1 442 %u0l = and i64 %u0, 4294967295 443 %u0h = lshr i64 %u0, 32 444 445 %u1 = add i64 %u0l, %t2 446 %u1h = lshr i64 %u1, 32 447 448 %u2 = add i64 %u0h, %t3 449 450 %hi = add i64 %u2, %u1h 451 ret i64 %hi 452} 453 454; TODO: https://alive2.llvm.org/ce/z/y26zaW 455define i64 @mullo(i64 %x, i64 %y) { 456; CHECK-LABEL: @mullo( 457; CHECK-NEXT: [[XL:%.*]] = and i64 [[X:%.*]], 4294967295 458; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 459; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y:%.*]], 4294967295 460; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 461; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 462; CHECK-NEXT: [[T1:%.*]] = mul i64 [[Y]], [[XH]] 463; CHECK-NEXT: [[T2:%.*]] = mul i64 [[YH]], [[X]] 464; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 465; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 466; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 467; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0]], [[T2]] 468; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 469; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]] 470; CHECK-NEXT: ret i64 [[LO]] 471; 472 %xl = and i64 %x, 4294967295 473 %xh = lshr i64 %x, 32 474 %yl = and i64 %y, 4294967295 475 %yh = lshr i64 %y, 32 476 477 %t0 = mul nuw i64 %yl, %xl 478 %t1 = mul nuw i64 %yl, %xh 479 %t2 = mul nuw i64 %yh, %xl 480 481 %t0l = and i64 %t0, 4294967295 482 %t0h = lshr i64 %t0, 32 483 484 %u0 = add i64 %t0h, %t1 485 %u0l = and i64 %u0, 4294967295 486 487 %u1 = add i64 %u0l, %t2 488 %u1ls = shl i64 %u1, 32 489 490 %lo = or i64 %u1ls, %t0l 491 ret i64 %lo 492} 493 494 495define i64 @mullo_variant3(i64 %a, i64 %b) { 496; CHECK-LABEL: @mullo_variant3( 497; CHECK-NEXT: [[LO:%.*]] = mul i64 [[A:%.*]], [[B:%.*]] 498; CHECK-NEXT: ret i64 [[LO]] 499; 500 %al = and i64 %a, 4294967295 501 %ah = lshr i64 %a, 32 502 %bl = and i64 %b, 4294967295 503 %bh = lshr i64 %b, 32 504 505 %t0 = mul nuw i64 %bl, %al 506 %t1 = mul nuw i64 %bl, %ah 507 %t2 = mul nuw i64 %bh, %al 508 509 %u1 = add i64 %t2, %t1 510 %u1ls = shl i64 %u1, 32 511 512 %lo = add i64 %u1ls, %t0 513 ret i64 %lo 514} 515 516 517declare void @eat_i64(i64) 518declare void @eat_i128(i128) 519 520define i64 @mullo_duplicate(i64 %x, i64 %y) { 521; CHECK-LABEL: @mullo_duplicate( 522; CHECK-NEXT: [[DUPLICATED_MUL:%.*]] = mul i64 [[X:%.*]], [[Y:%.*]] 523; CHECK-NEXT: call void @eat_i64(i64 [[DUPLICATED_MUL]]) 524; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 525; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 526; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 527; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 528; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 529; CHECK-NEXT: [[T1:%.*]] = mul i64 [[Y]], [[XH]] 530; CHECK-NEXT: [[T2:%.*]] = mul i64 [[YH]], [[X]] 531; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 532; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 533; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 534; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0]], [[T2]] 535; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 536; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]] 537; CHECK-NEXT: ret i64 [[LO]] 538; 539 %duplicated_mul = mul i64 %x, %y 540 call void @eat_i64(i64 %duplicated_mul) 541 542 %xl = and i64 %x, 4294967295 543 %xh = lshr i64 %x, 32 544 %yl = and i64 %y, 4294967295 545 %yh = lshr i64 %y, 32 546 547 %t0 = mul nuw i64 %yl, %xl 548 %t1 = mul nuw i64 %yl, %xh 549 %t2 = mul nuw i64 %yh, %xl 550 551 %t0l = and i64 %t0, 4294967295 552 %t0h = lshr i64 %t0, 32 553 554 %u0 = add i64 %t0h, %t1 555 %u0l = and i64 %u0, 4294967295 556 557 %u1 = add i64 %u0l, %t2 558 %u1ls = shl i64 %u1, 32 559 560 %lo = or i64 %u1ls, %t0l 561 ret i64 %lo 562} 563 564define { i64, i64 } @mul_full_64_duplicate(i64 %x, i64 %y) { 565; CHECK-LABEL: @mul_full_64_duplicate( 566; CHECK-NEXT: [[XX:%.*]] = zext i64 [[X:%.*]] to i128 567; CHECK-NEXT: [[YY:%.*]] = zext i64 [[Y:%.*]] to i128 568; CHECK-NEXT: [[DUPLICATED_MUL:%.*]] = mul nuw i128 [[XX]], [[YY]] 569; CHECK-NEXT: call void @eat_i128(i128 [[DUPLICATED_MUL]]) 570; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 571; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 572; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 573; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 574; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[YL]], [[XL]] 575; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[YL]], [[XH]] 576; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[YH]], [[XL]] 577; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[YH]], [[XH]] 578; CHECK-NEXT: [[T0L:%.*]] = and i64 [[T0]], 4294967295 579; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 580; CHECK-NEXT: [[U0:%.*]] = add i64 [[T0H]], [[T1]] 581; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 582; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 583; CHECK-NEXT: [[U1:%.*]] = add i64 [[U0L]], [[T2]] 584; CHECK-NEXT: [[U1LS:%.*]] = shl i64 [[U1]], 32 585; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 586; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 587; CHECK-NEXT: [[LO:%.*]] = or disjoint i64 [[U1LS]], [[T0L]] 588; CHECK-NEXT: [[HI:%.*]] = add i64 [[U2]], [[U1H]] 589; CHECK-NEXT: [[RES_LO:%.*]] = insertvalue { i64, i64 } undef, i64 [[LO]], 0 590; CHECK-NEXT: [[RES:%.*]] = insertvalue { i64, i64 } [[RES_LO]], i64 [[HI]], 1 591; CHECK-NEXT: ret { i64, i64 } [[RES]] 592; 593 %xx = zext i64 %x to i128 594 %yy = zext i64 %y to i128 595 %duplicated_mul = mul i128 %xx, %yy 596 call void @eat_i128(i128 %duplicated_mul) 597 598 %xl = and i64 %x, 4294967295 599 %xh = lshr i64 %x, 32 600 %yl = and i64 %y, 4294967295 601 %yh = lshr i64 %y, 32 602 603 %t0 = mul nuw i64 %yl, %xl 604 %t1 = mul nuw i64 %yl, %xh 605 %t2 = mul nuw i64 %yh, %xl 606 %t3 = mul nuw i64 %yh, %xh 607 608 %t0l = and i64 %t0, 4294967295 609 %t0h = lshr i64 %t0, 32 610 611 %u0 = add i64 %t0h, %t1 612 %u0l = and i64 %u0, 4294967295 613 %u0h = lshr i64 %u0, 32 614 615 %u1 = add i64 %u0l, %t2 616 %u1ls = shl i64 %u1, 32 617 %u1h = lshr i64 %u1, 32 618 619 %u2 = add i64 %u0h, %t3 620 621 %lo = or i64 %u1ls, %t0l 622 %hi = add i64 %u2, %u1h 623 624 %res_lo = insertvalue { i64, i64 } undef, i64 %lo, 0 625 %res = insertvalue { i64, i64 } %res_lo, i64 %hi, 1 626 ret { i64, i64 } %res 627} 628 629 630define i64 @umulhi_64_v2() { 631; CHECK-LABEL: @umulhi_64_v2( 632; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() 633; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() 634; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 635; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 636; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 637; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 638; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] 639; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] 640; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] 641; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] 642; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 643; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] 644; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 645; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] 646; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 647; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 648; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 649; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] 650; CHECK-NEXT: ret i64 [[HI]] 651; 652 %x = call i64 @get_number() 653 %y = call i64 @get_number() 654 655 %yl = and i64 %y, 4294967295 656 %yh = lshr i64 %y, 32 657 %xh = lshr i64 %x, 32 658 %xl = and i64 %x, 4294967295 659 660 %t3 = mul nuw i64 %xh, %yh 661 %t2 = mul nuw i64 %xl, %yh 662 %t1 = mul nuw i64 %xh, %yl 663 %t0 = mul nuw i64 %xl, %yl 664 665 %t0h = lshr i64 %t0, 32 666 %u0 = add i64 %t1, %t0h 667 %u0l = and i64 %u0, 4294967295 668 %u1 = add i64 %t2, %u0l 669 %u0h = lshr i64 %u0, 32 670 %u2 = add i64 %u0h, %t3 671 %u1h = lshr i64 %u1, 32 672 %hi = add i64 %u1h, %u2 673 674 ret i64 %hi 675} 676 677 678define i64 @umulhi_64_v3() { 679; CHECK-LABEL: @umulhi_64_v3( 680; CHECK-NEXT: [[X:%.*]] = call i64 @get_number() 681; CHECK-NEXT: [[XH:%.*]] = lshr i64 [[X]], 32 682; CHECK-NEXT: [[XL:%.*]] = and i64 [[X]], 4294967295 683; CHECK-NEXT: [[Y:%.*]] = call i64 @get_number() 684; CHECK-NEXT: [[YL:%.*]] = and i64 [[Y]], 4294967295 685; CHECK-NEXT: [[YH:%.*]] = lshr i64 [[Y]], 32 686; CHECK-NEXT: [[T3:%.*]] = mul nuw i64 [[XH]], [[YH]] 687; CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[XL]], [[YH]] 688; CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[XH]], [[YL]] 689; CHECK-NEXT: [[T0:%.*]] = mul nuw i64 [[XL]], [[YL]] 690; CHECK-NEXT: [[T0H:%.*]] = lshr i64 [[T0]], 32 691; CHECK-NEXT: [[U0:%.*]] = add i64 [[T1]], [[T0H]] 692; CHECK-NEXT: [[U0L:%.*]] = and i64 [[U0]], 4294967295 693; CHECK-NEXT: [[U1:%.*]] = add i64 [[T2]], [[U0L]] 694; CHECK-NEXT: [[U0H:%.*]] = lshr i64 [[U0]], 32 695; CHECK-NEXT: [[U2:%.*]] = add i64 [[U0H]], [[T3]] 696; CHECK-NEXT: [[U1H:%.*]] = lshr i64 [[U1]], 32 697; CHECK-NEXT: [[HI:%.*]] = add i64 [[U1H]], [[U2]] 698; CHECK-NEXT: ret i64 [[HI]] 699; 700 %x = call i64 @get_number() 701 %xh = lshr i64 %x, 32 702 %xl = and i64 %x, 4294967295 703 704 %y = call i64 @get_number() 705 %yl = and i64 %y, 4294967295 706 %yh = lshr i64 %y, 32 707 708 %t3 = mul nuw i64 %xh, %yh 709 %t2 = mul nuw i64 %xl, %yh 710 %t1 = mul nuw i64 %xh, %yl 711 %t0 = mul nuw i64 %xl, %yl 712 713 %t0h = lshr i64 %t0, 32 714 %u0 = add i64 %t1, %t0h 715 %u0l = and i64 %u0, 4294967295 716 %u1 = add i64 %t2, %u0l 717 %u0h = lshr i64 %u0, 32 718 %u2 = add i64 %u0h, %t3 719 %u1h = lshr i64 %u1, 32 720 %hi = add i64 %u1h, %u2 721 722 ret i64 %hi 723} 724