1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4declare i8 @use8(i8) 5declare i16 @use16(i16) 6declare i32 @use32(i32) 7declare i64 @use64(i64) 8declare i128 @use128(i128) 9declare i130 @use130(i130) 10declare <2 x i8> @use_v2i8(<2 x i8>) 11 12; The following 16 cases are used for cover the commuted operand ADD and MUL 13; with extra uses to more of these tests to exercise those cases. 14; The different _Ax suffix hints the variety of combinations MUL 15; The different _Bx suffix hints the variety of combinations ADD 16; 4 tests that use in0/in1 with different commutes 17define i8 @mul8_low_A0_B0(i8 %in0, i8 %in1) { 18; CHECK-LABEL: @mul8_low_A0_B0( 19; CHECK-NEXT: [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1:%.*]] 20; CHECK-NEXT: ret i8 [[RETLO]] 21; 22 %In0Lo = and i8 %in0, 15 23 %In0Hi = lshr i8 %in0, 4 24 %In1Lo = and i8 %in1, 15 25 %In1Hi = lshr i8 %in1, 4 26 %m10 = mul i8 %In1Hi, %in0 27 %m01 = mul i8 %In0Hi, %in1 28 %m00 = mul i8 %In1Lo, %In0Lo 29 %addc = add i8 %m10, %m01 30 %shl = shl i8 %addc, 4 31 %retLo = add i8 %shl, %m00 32 ret i8 %retLo 33} 34 35define i8 @mul8_low_A0_B1(i8 %p, i8 %in1) { 36; CHECK-LABEL: @mul8_low_A0_B1( 37; CHECK-NEXT: [[IN0:%.*]] = call i8 @use8(i8 [[P:%.*]]) 38; CHECK-NEXT: [[RETLO:%.*]] = mul i8 [[IN0]], [[IN1:%.*]] 39; CHECK-NEXT: ret i8 [[RETLO]] 40; 41 %in0 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization 42 %In0Lo = and i8 %in0, 15 43 %In0Hi = lshr i8 %in0, 4 44 %In1Lo = and i8 %in1, 15 45 %In1Hi = lshr i8 %in1, 4 46 %m10 = mul i8 %in0, %In1Hi 47 %m01 = mul i8 %In0Hi, %in1 48 %m00 = mul i8 %In1Lo, %In0Lo 49 %addc = add i8 %m10, %m01 50 %shl = shl i8 %addc, 4 51 %retLo = add i8 %m00, %shl 52 ret i8 %retLo 53} 54 55define i8 @mul8_low_A0_B2(i8 %in0, i8 %p) { 56; CHECK-LABEL: @mul8_low_A0_B2( 57; CHECK-NEXT: [[IN1:%.*]] = call i8 @use8(i8 [[P:%.*]]) 58; CHECK-NEXT: [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1]] 59; CHECK-NEXT: ret i8 [[RETLO]] 60; 61 62 %in1 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization 63 %In0Lo = and i8 %in0, 15 64 %In0Hi = lshr i8 %in0, 4 65 %In1Lo = and i8 %in1, 15 66 %In1Hi = lshr i8 %in1, 4 67 %m10 = mul i8 %In1Hi, %in0 68 %m01 = mul i8 %in1, %In0Hi 69 %m00 = mul i8 %In1Lo, %In0Lo 70 %addc = add i8 %m01, %m10 71 %shl = shl i8 %addc, 4 72 %retLo = add i8 %shl, %m00 73 ret i8 %retLo 74} 75 76define i8 @mul8_low_A0_B3(i8 %p, i8 %q) { 77; CHECK-LABEL: @mul8_low_A0_B3( 78; CHECK-NEXT: [[IN0:%.*]] = call i8 @use8(i8 [[P:%.*]]) 79; CHECK-NEXT: [[IN1:%.*]] = call i8 @use8(i8 [[Q:%.*]]) 80; CHECK-NEXT: [[RETLO:%.*]] = mul i8 [[IN0]], [[IN1]] 81; CHECK-NEXT: ret i8 [[RETLO]] 82; 83 %in0 = call i8 @use8(i8 %p) ; thwart complexity-based canonicalization 84 %in1 = call i8 @use8(i8 %q) ; thwart complexity-based canonicalization 85 %In0Lo = and i8 %in0, 15 86 %In0Hi = lshr i8 %in0, 4 87 %In1Lo = and i8 %in1, 15 88 %In1Hi = lshr i8 %in1, 4 89 %m10 = mul i8 %in0, %In1Hi 90 %m01 = mul i8 %in1, %In0Hi 91 %m00 = mul i8 %In1Lo, %In0Lo 92 %addc = add i8 %m01, %m10 93 %shl = shl i8 %addc, 4 94 %retLo = add i8 %m00, %shl 95 ret i8 %retLo 96} 97 98; 4 tests that use In0Lo/In1Lo with different commutes 99define i16 @mul16_low_A1_B0(i16 %in0, i16 %in1) { 100; CHECK-LABEL: @mul16_low_A1_B0( 101; CHECK-NEXT: [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255 102; CHECK-NEXT: [[IN0HI:%.*]] = lshr i16 [[IN0]], 8 103; CHECK-NEXT: [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255 104; CHECK-NEXT: [[IN1HI:%.*]] = lshr i16 [[IN1]], 8 105; CHECK-NEXT: [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]] 106; CHECK-NEXT: call void @use16(i16 [[M10]]) 107; CHECK-NEXT: [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]] 108; CHECK-NEXT: call void @use16(i16 [[M01]]) 109; CHECK-NEXT: [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]] 110; CHECK-NEXT: ret i16 [[RETLO]] 111; 112 %In0Lo = and i16 %in0, 255 113 %In0Hi = lshr i16 %in0, 8 114 %In1Lo = and i16 %in1, 255 115 %In1Hi = lshr i16 %in1, 8 116 %m10 = mul i16 %In0Lo, %In1Hi 117 call void @use16(i16 %m10) 118 %m01 = mul i16 %In1Lo, %In0Hi 119 call void @use16(i16 %m01) 120 %m00 = mul i16 %In1Lo, %In0Lo 121 %addc = add i16 %m10, %m01 122 %shl = shl i16 %addc, 8 123 %retLo = add i16 %shl, %m00 124 ret i16 %retLo 125} 126 127define i16 @mul16_low_A1_B1(i16 %in0, i16 %in1) { 128; CHECK-LABEL: @mul16_low_A1_B1( 129; CHECK-NEXT: [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255 130; CHECK-NEXT: [[IN0HI:%.*]] = lshr i16 [[IN0]], 8 131; CHECK-NEXT: [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255 132; CHECK-NEXT: [[IN1HI:%.*]] = lshr i16 [[IN1]], 8 133; CHECK-NEXT: [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]] 134; CHECK-NEXT: call void @use16(i16 [[M10]]) 135; CHECK-NEXT: [[M01:%.*]] = mul nuw i16 [[IN0HI]], [[IN1LO]] 136; CHECK-NEXT: call void @use16(i16 [[M01]]) 137; CHECK-NEXT: [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]] 138; CHECK-NEXT: ret i16 [[RETLO]] 139; 140 %In0Lo = and i16 %in0, 255 141 %In0Hi = lshr i16 %in0, 8 142 %In1Lo = and i16 %in1, 255 143 %In1Hi = lshr i16 %in1, 8 144 %m10 = mul i16 %In0Lo, %In1Hi 145 call void @use16(i16 %m10) 146 %m01 = mul i16 %In0Hi, %In1Lo 147 call void @use16(i16 %m01) 148 %m00 = mul i16 %In1Lo, %In0Lo 149 %addc = add i16 %m10, %m01 150 %shl = shl i16 %addc, 8 151 %retLo = add i16 %m00, %shl 152 ret i16 %retLo 153} 154 155define i16 @mul16_low_A1_B2(i16 %in0, i16 %in1) { 156; CHECK-LABEL: @mul16_low_A1_B2( 157; CHECK-NEXT: [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255 158; CHECK-NEXT: [[IN0HI:%.*]] = lshr i16 [[IN0]], 8 159; CHECK-NEXT: [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255 160; CHECK-NEXT: [[IN1HI:%.*]] = lshr i16 [[IN1]], 8 161; CHECK-NEXT: [[M10:%.*]] = mul nuw i16 [[IN1HI]], [[IN0LO]] 162; CHECK-NEXT: call void @use16(i16 [[M10]]) 163; CHECK-NEXT: [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]] 164; CHECK-NEXT: call void @use16(i16 [[M01]]) 165; CHECK-NEXT: [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]] 166; CHECK-NEXT: ret i16 [[RETLO]] 167; 168 %In0Lo = and i16 %in0, 255 169 %In0Hi = lshr i16 %in0, 8 170 %In1Lo = and i16 %in1, 255 171 %In1Hi = lshr i16 %in1, 8 172 %m10 = mul i16 %In1Hi, %In0Lo 173 call void @use16(i16 %m10) 174 %m01 = mul i16 %In1Lo, %In0Hi 175 call void @use16(i16 %m01) 176 %m00 = mul i16 %In1Lo, %In0Lo 177 %addc = add i16 %m01, %m10 178 %shl = shl i16 %addc, 8 179 %retLo = add i16 %shl, %m00 180 ret i16 %retLo 181} 182 183define i16 @mul16_low_A1_B3(i16 %in0, i16 %in1) { 184; CHECK-LABEL: @mul16_low_A1_B3( 185; CHECK-NEXT: [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 255 186; CHECK-NEXT: [[IN0HI:%.*]] = lshr i16 [[IN0]], 8 187; CHECK-NEXT: [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 255 188; CHECK-NEXT: [[IN1HI:%.*]] = lshr i16 [[IN1]], 8 189; CHECK-NEXT: [[M10:%.*]] = mul nuw i16 [[IN0LO]], [[IN1HI]] 190; CHECK-NEXT: call void @use16(i16 [[M10]]) 191; CHECK-NEXT: [[M01:%.*]] = mul nuw i16 [[IN1LO]], [[IN0HI]] 192; CHECK-NEXT: call void @use16(i16 [[M01]]) 193; CHECK-NEXT: [[RETLO:%.*]] = mul i16 [[IN0]], [[IN1]] 194; CHECK-NEXT: ret i16 [[RETLO]] 195; 196 %In0Lo = and i16 %in0, 255 197 %In0Hi = lshr i16 %in0, 8 198 %In1Lo = and i16 %in1, 255 199 %In1Hi = lshr i16 %in1, 8 200 %m10 = mul i16 %In0Lo, %In1Hi 201 call void @use16(i16 %m10) 202 %m01 = mul i16 %In1Lo, %In0Hi 203 call void @use16(i16 %m01) 204 %m00 = mul i16 %In1Lo, %In0Lo 205 %addc = add i16 %m01, %m10 206 %shl = shl i16 %addc, 8 207 %retLo = add i16 %m00, %shl 208 ret i16 %retLo 209} 210 211; 4 tests that use In0Lo/in1 with different commutes 212define i32 @mul32_low_A2_B0(i32 %in0, i32 %in1) { 213; CHECK-LABEL: @mul32_low_A2_B0( 214; CHECK-NEXT: [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535 215; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16 216; CHECK-NEXT: [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]] 217; CHECK-NEXT: call void @use32(i32 [[M10]]) 218; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 219; CHECK-NEXT: ret i32 [[RETLO]] 220; 221 %In0Lo = and i32 %in0, 65535 222 %In0Hi = lshr i32 %in0, 16 223 %In1Lo = and i32 %in1, 65535 224 %In1Hi = lshr i32 %in1, 16 225 %m10 = mul i32 %In1Hi, %In0Lo 226 call void @use32(i32 %m10) 227 %m01 = mul i32 %In0Hi, %in1 228 %m00 = mul i32 %In1Lo, %In0Lo 229 %addc = add i32 %m10, %m01 230 %shl = shl i32 %addc, 16 231 %retLo = add i32 %shl, %m00 232 ret i32 %retLo 233} 234 235define i32 @mul32_low_A2_B1(i32 %in0, i32 %in1) { 236; CHECK-LABEL: @mul32_low_A2_B1( 237; CHECK-NEXT: [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535 238; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16 239; CHECK-NEXT: [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]] 240; CHECK-NEXT: call void @use32(i32 [[M10]]) 241; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 242; CHECK-NEXT: ret i32 [[RETLO]] 243; 244 %In0Lo = and i32 %in0, 65535 245 %In0Hi = lshr i32 %in0, 16 246 %In1Lo = and i32 %in1, 65535 247 %In1Hi = lshr i32 %in1, 16 248 %m10 = mul i32 %In1Hi, %In0Lo 249 call void @use32(i32 %m10) 250 %m01 = mul i32 %In0Hi, %in1 251 %m00 = mul i32 %In1Lo, %In0Lo 252 %addc = add i32 %m10, %m01 253 %shl = shl i32 %addc, 16 254 %retLo = add i32 %m00, %shl 255 ret i32 %retLo 256} 257 258define i32 @mul32_low_A2_B2(i32 %in0, i32 %p) { 259; CHECK-LABEL: @mul32_low_A2_B2( 260; CHECK-NEXT: [[IN1:%.*]] = call i32 @use32(i32 [[P:%.*]]) 261; CHECK-NEXT: [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535 262; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1]], 16 263; CHECK-NEXT: [[M10:%.*]] = mul nuw i32 [[IN0LO]], [[IN1HI]] 264; CHECK-NEXT: call void @use32(i32 [[M10]]) 265; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 266; CHECK-NEXT: ret i32 [[RETLO]] 267; 268 %in1 = call i32 @use32(i32 %p) ; thwart complexity-based canonicalization 269 %In0Lo = and i32 %in0, 65535 270 %In0Hi = lshr i32 %in0, 16 271 %In1Lo = and i32 %in1, 65535 272 %In1Hi = lshr i32 %in1, 16 273 %m10 = mul i32 %In0Lo, %In1Hi 274 call void @use32(i32 %m10) 275 %m01 = mul i32 %in1, %In0Hi 276 %m00 = mul i32 %In1Lo, %In0Lo 277 %addc = add i32 %m01, %m10 278 %shl = shl i32 %addc, 16 279 %retLo = add i32 %shl, %m00 280 ret i32 %retLo 281} 282 283define i32 @mul32_low_A2_B3(i32 %in0, i32 %p) { 284; CHECK-LABEL: @mul32_low_A2_B3( 285; CHECK-NEXT: [[IN1:%.*]] = call i32 @use32(i32 [[P:%.*]]) 286; CHECK-NEXT: [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535 287; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1]], 16 288; CHECK-NEXT: [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]] 289; CHECK-NEXT: call void @use32(i32 [[M10]]) 290; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 291; CHECK-NEXT: ret i32 [[RETLO]] 292; 293 %in1 = call i32 @use32(i32 %p) ; thwart complexity-based canonicalization 294 %In0Lo = and i32 %in0, 65535 295 %In0Hi = lshr i32 %in0, 16 296 %In1Lo = and i32 %in1, 65535 297 %In1Hi = lshr i32 %in1, 16 298 %m10 = mul i32 %In1Hi, %In0Lo 299 call void @use32(i32 %m10) 300 %m01 = mul i32 %in1, %In0Hi 301 %m00 = mul i32 %In1Lo, %In0Lo 302 %addc = add i32 %m01, %m10 303 %shl = shl i32 %addc, 16 304 %retLo = add i32 %m00, %shl 305 ret i32 %retLo 306} 307 308; 4 tests that use in0/In1Lo with different commutes 309define i64 @mul64_low_A3_B0(i64 %in0, i64 %in1) { 310; CHECK-LABEL: @mul64_low_A3_B0( 311; CHECK-NEXT: [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32 312; CHECK-NEXT: [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295 313; CHECK-NEXT: [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]] 314; CHECK-NEXT: call void @use64(i64 [[M01]]) 315; CHECK-NEXT: [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]] 316; CHECK-NEXT: ret i64 [[RETLO]] 317; 318 %In0Lo = and i64 %in0, 4294967295 319 %In0Hi = lshr i64 %in0, 32 320 %In1Lo = and i64 %in1, 4294967295 321 %In1Hi = lshr i64 %in1, 32 322 %m10 = mul i64 %In1Hi, %in0 323 %m01 = mul i64 %In0Hi, %In1Lo 324 call void @use64(i64 %m01) 325 %m00 = mul i64 %In1Lo, %In0Lo 326 %addc = add i64 %m10, %m01 327 %shl = shl i64 %addc, 32 328 %retLo = add i64 %shl, %m00 329 ret i64 %retLo 330} 331 332define i64 @mul64_low_A3_B1(i64 %in0, i64 %in1) { 333; CHECK-LABEL: @mul64_low_A3_B1( 334; CHECK-NEXT: [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32 335; CHECK-NEXT: [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295 336; CHECK-NEXT: [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]] 337; CHECK-NEXT: call void @use64(i64 [[M01]]) 338; CHECK-NEXT: [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]] 339; CHECK-NEXT: ret i64 [[RETLO]] 340; 341 %In0Lo = and i64 %in0, 4294967295 342 %In0Hi = lshr i64 %in0, 32 343 %In1Lo = and i64 %in1, 4294967295 344 %In1Hi = lshr i64 %in1, 32 345 %m10 = mul i64 %In1Hi, %in0 346 %m01 = mul i64 %In0Hi, %In1Lo 347 call void @use64(i64 %m01) 348 %m00 = mul i64 %In1Lo, %In0Lo 349 %addc = add i64 %m10, %m01 350 %shl = shl i64 %addc, 32 351 %retLo = add i64 %m00, %shl 352 ret i64 %retLo 353} 354 355define i64 @mul64_low_A3_B2(i64 %p, i64 %in1) { 356; CHECK-LABEL: @mul64_low_A3_B2( 357; CHECK-NEXT: [[IN0:%.*]] = call i64 @use64(i64 [[P:%.*]]) 358; CHECK-NEXT: [[IN0HI:%.*]] = lshr i64 [[IN0]], 32 359; CHECK-NEXT: [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295 360; CHECK-NEXT: [[M01:%.*]] = mul nuw i64 [[IN0HI]], [[IN1LO]] 361; CHECK-NEXT: call void @use64(i64 [[M01]]) 362; CHECK-NEXT: [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]] 363; CHECK-NEXT: ret i64 [[RETLO]] 364; 365 %in0 = call i64 @use64(i64 %p) ; thwart complexity-based canonicalization 366 %In0Lo = and i64 %in0, 4294967295 367 %In0Hi = lshr i64 %in0, 32 368 %In1Lo = and i64 %in1, 4294967295 369 %In1Hi = lshr i64 %in1, 32 370 %m10 = mul i64 %in0, %In1Hi 371 %m01 = mul i64 %In0Hi, %In1Lo 372 call void @use64(i64 %m01) 373 %m00 = mul i64 %In1Lo, %In0Lo 374 %addc = add i64 %m01, %m10 375 %shl = shl i64 %addc, 32 376 %retLo = add i64 %shl, %m00 377 ret i64 %retLo 378} 379 380define i64 @mul64_low_A3_B3(i64 %p, i64 %in1) { 381; CHECK-LABEL: @mul64_low_A3_B3( 382; CHECK-NEXT: [[IN0:%.*]] = call i64 @use64(i64 [[P:%.*]]) 383; CHECK-NEXT: [[IN0HI:%.*]] = lshr i64 [[IN0]], 32 384; CHECK-NEXT: [[IN1LO:%.*]] = and i64 [[IN1:%.*]], 4294967295 385; CHECK-NEXT: [[M01:%.*]] = mul nuw i64 [[IN1LO]], [[IN0HI]] 386; CHECK-NEXT: call void @use64(i64 [[M01]]) 387; CHECK-NEXT: [[RETLO:%.*]] = mul i64 [[IN0]], [[IN1]] 388; CHECK-NEXT: ret i64 [[RETLO]] 389; 390 %in0 = call i64 @use64(i64 %p) ; thwart complexity-based canonicalization 391 %In0Lo = and i64 %in0, 4294967295 392 %In0Hi = lshr i64 %in0, 32 393 %In1Lo = and i64 %in1, 4294967295 394 %In1Hi = lshr i64 %in1, 32 395 %m10 = mul i64 %in0, %In1Hi 396 %m01 = mul i64 %In1Lo, %In0Hi 397 call void @use64(i64 %m01) 398 %m00 = mul i64 %In1Lo, %In0Lo 399 %addc = add i64 %m01, %m10 400 %shl = shl i64 %addc, 32 401 %retLo = add i64 %m00, %shl 402 ret i64 %retLo 403} 404 405define i32 @mul32_low_one_extra_user(i32 %in0, i32 %in1) { 406; CHECK-LABEL: @mul32_low_one_extra_user( 407; CHECK-NEXT: [[IN0LO:%.*]] = and i32 [[IN0:%.*]], 65535 408; CHECK-NEXT: [[IN0HI:%.*]] = lshr i32 [[IN0]], 16 409; CHECK-NEXT: [[IN1LO:%.*]] = and i32 [[IN1:%.*]], 65535 410; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1]], 16 411; CHECK-NEXT: [[M10:%.*]] = mul nuw i32 [[IN1HI]], [[IN0LO]] 412; CHECK-NEXT: [[M01:%.*]] = mul nuw i32 [[IN1LO]], [[IN0HI]] 413; CHECK-NEXT: [[ADDC:%.*]] = add i32 [[M10]], [[M01]] 414; CHECK-NEXT: call void @use32(i32 [[ADDC]]) 415; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 416; CHECK-NEXT: ret i32 [[RETLO]] 417; 418 %In0Lo = and i32 %in0, 65535 419 %In0Hi = lshr i32 %in0, 16 420 %In1Lo = and i32 %in1, 65535 421 %In1Hi = lshr i32 %in1, 16 422 %m10 = mul i32 %In1Hi, %In0Lo 423 %m01 = mul i32 %In1Lo, %In0Hi 424 %m00 = mul i32 %In1Lo, %In0Lo 425 %addc = add i32 %m10, %m01 426 call void @use32(i32 %addc) 427 %shl = shl i32 %addc, 16 428 %retLo = add i32 %shl, %m00 429 ret i32 %retLo 430} 431 432; The following are variety types of target cases 433; https://alive2.llvm.org/ce/z/2BqKLt 434define i8 @mul8_low(i8 %in0, i8 %in1) { 435; CHECK-LABEL: @mul8_low( 436; CHECK-NEXT: [[RETLO:%.*]] = mul i8 [[IN0:%.*]], [[IN1:%.*]] 437; CHECK-NEXT: ret i8 [[RETLO]] 438; 439 %In0Lo = and i8 %in0, 15 440 %In0Hi = lshr i8 %in0, 4 441 %In1Lo = and i8 %in1, 15 442 %In1Hi = lshr i8 %in1, 4 443 %m10 = mul i8 %In1Hi, %In0Lo 444 %m01 = mul i8 %In1Lo, %In0Hi 445 %m00 = mul i8 %In1Lo, %In0Lo 446 %addc = add i8 %m10, %m01 447 %shl = shl i8 %addc, 4 448 %retLo = add i8 %shl, %m00 449 ret i8 %retLo 450} 451 452define i16 @mul16_low(i16 %in0, i16 %in1) { 453; CHECK-LABEL: @mul16_low( 454; CHECK-NEXT: [[RETLO:%.*]] = mul i16 [[IN0:%.*]], [[IN1:%.*]] 455; CHECK-NEXT: ret i16 [[RETLO]] 456; 457 %In0Lo = and i16 %in0, 255 458 %In0Hi = lshr i16 %in0, 8 459 %In1Lo = and i16 %in1, 255 460 %In1Hi = lshr i16 %in1, 8 461 %m10 = mul i16 %In1Hi, %In0Lo 462 %m01 = mul i16 %In1Lo, %In0Hi 463 %m00 = mul i16 %In1Lo, %In0Lo 464 %addc = add i16 %m10, %m01 465 %shl = shl i16 %addc, 8 466 %retLo = add i16 %shl, %m00 467 ret i16 %retLo 468} 469 470define i32 @mul32_low(i32 %in0, i32 %in1) { 471; CHECK-LABEL: @mul32_low( 472; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0:%.*]], [[IN1:%.*]] 473; CHECK-NEXT: ret i32 [[RETLO]] 474; 475 %In0Lo = and i32 %in0, 65535 476 %In0Hi = lshr i32 %in0, 16 477 %In1Lo = and i32 %in1, 65535 478 %In1Hi = lshr i32 %in1, 16 479 %m10 = mul i32 %In1Hi, %In0Lo 480 %m01 = mul i32 %In1Lo, %In0Hi 481 %m00 = mul i32 %In1Lo, %In0Lo 482 %addc = add i32 %m10, %m01 483 %shl = shl i32 %addc, 16 484 %retLo = add i32 %shl, %m00 485 ret i32 %retLo 486} 487 488define i64 @mul64_low(i64 %in0, i64 %in1) { 489; CHECK-LABEL: @mul64_low( 490; CHECK-NEXT: [[RETLO:%.*]] = mul i64 [[IN0:%.*]], [[IN1:%.*]] 491; CHECK-NEXT: ret i64 [[RETLO]] 492; 493 %In0Lo = and i64 %in0, 4294967295 494 %In0Hi = lshr i64 %in0, 32 495 %In1Lo = and i64 %in1, 4294967295 496 %In1Hi = lshr i64 %in1, 32 497 %m10 = mul i64 %In1Hi, %In0Lo 498 %m01 = mul i64 %In1Lo, %In0Hi 499 %m00 = mul i64 %In1Lo, %In0Lo 500 %addc = add i64 %m10, %m01 501 %shl = shl i64 %addc, 32 502 %retLo = add i64 %shl, %m00 503 ret i64 %retLo 504} 505 506define i128 @mul128_low(i128 %in0, i128 %in1) { 507; CHECK-LABEL: @mul128_low( 508; CHECK-NEXT: [[RETLO:%.*]] = mul i128 [[IN0:%.*]], [[IN1:%.*]] 509; CHECK-NEXT: ret i128 [[RETLO]] 510; 511 %In0Lo = and i128 %in0, 18446744073709551615 512 %In0Hi = lshr i128 %in0, 64 513 %In1Lo = and i128 %in1, 18446744073709551615 514 %In1Hi = lshr i128 %in1, 64 515 %m10 = mul i128 %In1Hi, %In0Lo 516 %m01 = mul i128 %In1Lo, %In0Hi 517 %m00 = mul i128 %In1Lo, %In0Lo 518 %addc = add i128 %m10, %m01 519 %shl = shl i128 %addc, 64 520 %retLo = add i128 %shl, %m00 521 ret i128 %retLo 522} 523 524; Support vector type 525define <2 x i8> @mul_v2i8_low(<2 x i8> %in0, <2 x i8> %in1) { 526; CHECK-LABEL: @mul_v2i8_low( 527; CHECK-NEXT: [[RETLO:%.*]] = mul <2 x i8> [[IN0:%.*]], [[IN1:%.*]] 528; CHECK-NEXT: ret <2 x i8> [[RETLO]] 529; 530 %In0Lo = and <2 x i8> %in0, <i8 15, i8 15> 531 %In0Hi = lshr <2 x i8> %in0, <i8 4, i8 4> 532 %In1Lo = and <2 x i8> %in1, <i8 15, i8 15> 533 %In1Hi = lshr <2 x i8> %in1, <i8 4, i8 4> 534 %m10 = mul <2 x i8> %In1Hi, %In0Lo 535 %m01 = mul <2 x i8> %In1Lo, %In0Hi 536 %m00 = mul <2 x i8> %In1Lo, %In0Lo 537 %addc = add <2 x i8> %m10, %m01 538 %shl = shl <2 x i8> %addc, <i8 4, i8 4> 539 %retLo = add <2 x i8> %shl, %m00 540 ret <2 x i8> %retLo 541} 542 543define <2 x i8> @mul_v2i8_low_one_extra_user(<2 x i8> %in0, <2 x i8> %in1) { 544; CHECK-LABEL: @mul_v2i8_low_one_extra_user( 545; CHECK-NEXT: [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], splat (i8 4) 546; CHECK-NEXT: [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], splat (i8 15) 547; CHECK-NEXT: [[M01:%.*]] = mul nuw <2 x i8> [[IN1LO]], [[IN0HI]] 548; CHECK-NEXT: call void @use_v2i8(<2 x i8> [[M01]]) 549; CHECK-NEXT: [[RETLO:%.*]] = mul <2 x i8> [[IN0]], [[IN1]] 550; CHECK-NEXT: ret <2 x i8> [[RETLO]] 551; 552 %In0Lo = and <2 x i8> %in0, <i8 15, i8 15> 553 %In0Hi = lshr <2 x i8> %in0, <i8 4, i8 4> 554 %In1Lo = and <2 x i8> %in1, <i8 15, i8 15> 555 %In1Hi = lshr <2 x i8> %in1, <i8 4, i8 4> 556 %m10 = mul <2 x i8> %In1Hi, %In0Lo 557 %m01 = mul <2 x i8> %In1Lo, %In0Hi 558 call void @use_v2i8(<2 x i8> %m01) 559 %m00 = mul <2 x i8> %In1Lo, %In0Lo 560 %addc = add <2 x i8> %m10, %m01 561 %shl = shl <2 x i8> %addc, <i8 4, i8 4> 562 %retLo = add <2 x i8> %shl, %m00 563 ret <2 x i8> %retLo 564} 565 566; Support wide width 567define i130 @mul130_low(i130 %in0, i130 %in1) { 568; CHECK-LABEL: @mul130_low( 569; CHECK-NEXT: [[RETLO:%.*]] = mul i130 [[IN0:%.*]], [[IN1:%.*]] 570; CHECK-NEXT: ret i130 [[RETLO]] 571; 572 %In0Lo = and i130 %in0, 36893488147419103231 573 %In0Hi = lshr i130 %in0, 65 574 %In1Lo = and i130 %in1, 36893488147419103231 575 %In1Hi = lshr i130 %in1, 65 576 %m10 = mul i130 %In1Hi, %In0Lo 577 %m01 = mul i130 %In1Lo, %In0Hi 578 %m00 = mul i130 %In1Lo, %In0Lo 579 %addc = add i130 %m10, %m01 580 %shl = shl i130 %addc, 65 581 %retLo = add i130 %shl, %m00 582 ret i130 %retLo 583} 584 585define i130 @mul130_low_one_extra_user(i130 %in0, i130 %in1) { 586; CHECK-LABEL: @mul130_low_one_extra_user( 587; CHECK-NEXT: [[IN0LO:%.*]] = and i130 [[IN0:%.*]], 36893488147419103231 588; CHECK-NEXT: [[IN1HI:%.*]] = lshr i130 [[IN1:%.*]], 65 589; CHECK-NEXT: [[M10:%.*]] = mul nuw i130 [[IN1HI]], [[IN0LO]] 590; CHECK-NEXT: call void @use130(i130 [[M10]]) 591; CHECK-NEXT: [[RETLO:%.*]] = mul i130 [[IN0]], [[IN1]] 592; CHECK-NEXT: ret i130 [[RETLO]] 593; 594 %In0Lo = and i130 %in0, 36893488147419103231 595 %In0Hi = lshr i130 %in0, 65 596 %In1Lo = and i130 %in1, 36893488147419103231 597 %In1Hi = lshr i130 %in1, 65 598 %m10 = mul i130 %In1Hi, %In0Lo 599 call void @use130(i130 %m10) 600 %m01 = mul i130 %In1Lo, %In0Hi 601 %m00 = mul i130 %In1Lo, %In0Lo 602 %addc = add i130 %m10, %m01 603 %shl = shl i130 %addc, 65 604 %retLo = add i130 %shl, %m00 605 ret i130 %retLo 606} 607 608; Negative case: Skip odd bitwidth type 609define i9 @mul9_low(i9 %in0, i9 %in1) { 610; CHECK-LABEL: @mul9_low( 611; CHECK-NEXT: [[IN0LO:%.*]] = and i9 [[IN0:%.*]], 15 612; CHECK-NEXT: [[IN0HI:%.*]] = lshr i9 [[IN0]], 4 613; CHECK-NEXT: [[IN1LO:%.*]] = and i9 [[IN1:%.*]], 15 614; CHECK-NEXT: [[IN1HI:%.*]] = lshr i9 [[IN1]], 4 615; CHECK-NEXT: [[M10:%.*]] = mul nuw i9 [[IN1HI]], [[IN0LO]] 616; CHECK-NEXT: [[M01:%.*]] = mul nuw i9 [[IN1LO]], [[IN0HI]] 617; CHECK-NEXT: [[M00:%.*]] = mul nuw nsw i9 [[IN1LO]], [[IN0LO]] 618; CHECK-NEXT: [[ADDC:%.*]] = add i9 [[M10]], [[M01]] 619; CHECK-NEXT: [[SHL:%.*]] = shl i9 [[ADDC]], 4 620; CHECK-NEXT: [[RETLO:%.*]] = add i9 [[SHL]], [[M00]] 621; CHECK-NEXT: ret i9 [[RETLO]] 622; 623 %In0Lo = and i9 %in0, 15 624 %In0Hi = lshr i9 %in0, 4 625 %In1Lo = and i9 %in1, 15 626 %In1Hi = lshr i9 %in1, 4 627 %m10 = mul i9 %In1Hi, %In0Lo 628 %m01 = mul i9 %In1Lo, %In0Hi 629 %m00 = mul i9 %In1Lo, %In0Lo 630 %addc = add i9 %m10, %m01 631 %shl = shl i9 %addc, 4 632 %retLo = add i9 %shl, %m00 633 ret i9 %retLo 634} 635 636; Negative test: Should not remote the "and", https://alive2.llvm.org/ce/z/JLmNU5 637define i64 @mul64_low_no_and(i64 %in0, i64 %in1) { 638; CHECK-LABEL: @mul64_low_no_and( 639; CHECK-NEXT: [[IN0HI:%.*]] = lshr i64 [[IN0:%.*]], 32 640; CHECK-NEXT: [[IN1HI:%.*]] = lshr i64 [[IN1:%.*]], 32 641; CHECK-NEXT: [[M10:%.*]] = mul i64 [[IN1HI]], [[IN0]] 642; CHECK-NEXT: [[M01:%.*]] = mul i64 [[IN1]], [[IN0HI]] 643; CHECK-NEXT: [[M00:%.*]] = mul i64 [[IN1]], [[IN0]] 644; CHECK-NEXT: [[ADDC:%.*]] = add i64 [[M10]], [[M01]] 645; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[ADDC]], 32 646; CHECK-NEXT: [[RETLO:%.*]] = add i64 [[SHL]], [[M00]] 647; CHECK-NEXT: ret i64 [[RETLO]] 648; 649 %In0Hi = lshr i64 %in0, 32 650 %In1Hi = lshr i64 %in1, 32 651 %m10 = mul i64 %In1Hi, %in0 652 %m01 = mul i64 %in1, %In0Hi 653 %m00 = mul i64 %in1, %in0 654 %addc = add i64 %m10, %m01 655 %shl = shl i64 %addc, 32 656 %retLo = add i64 %shl, %m00 657 ret i64 %retLo 658} 659 660; Negative test: Miss match the shift amount 661define i16 @mul16_low_miss_shift_amount(i16 %in0, i16 %in1) { 662; CHECK-LABEL: @mul16_low_miss_shift_amount( 663; CHECK-NEXT: [[IN0LO:%.*]] = and i16 [[IN0:%.*]], 127 664; CHECK-NEXT: [[IN0HI:%.*]] = lshr i16 [[IN0]], 8 665; CHECK-NEXT: [[IN1LO:%.*]] = and i16 [[IN1:%.*]], 127 666; CHECK-NEXT: [[IN1HI:%.*]] = lshr i16 [[IN1]], 8 667; CHECK-NEXT: [[M10:%.*]] = mul nuw nsw i16 [[IN1HI]], [[IN0LO]] 668; CHECK-NEXT: [[M01:%.*]] = mul nuw nsw i16 [[IN1LO]], [[IN0HI]] 669; CHECK-NEXT: [[M00:%.*]] = mul nuw nsw i16 [[IN1LO]], [[IN0LO]] 670; CHECK-NEXT: [[ADDC:%.*]] = add nuw i16 [[M10]], [[M01]] 671; CHECK-NEXT: [[SHL:%.*]] = shl i16 [[ADDC]], 8 672; CHECK-NEXT: [[RETLO:%.*]] = add i16 [[SHL]], [[M00]] 673; CHECK-NEXT: ret i16 [[RETLO]] 674; 675 %In0Lo = and i16 %in0, 127 ; Should be 255 676 %In0Hi = lshr i16 %in0, 8 677 %In1Lo = and i16 %in1, 127 678 %In1Hi = lshr i16 %in1, 8 679 %m10 = mul i16 %In1Hi, %In0Lo 680 %m01 = mul i16 %In1Lo, %In0Hi 681 %m00 = mul i16 %In1Lo, %In0Lo 682 %addc = add i16 %m10, %m01 683 %shl = shl i16 %addc, 8 684 %retLo = add i16 %shl, %m00 685 ret i16 %retLo 686} 687 688; Negative test: Miss match the half width 689define i8 @mul8_low_miss_half_width(i8 %in0, i8 %in1) { 690; CHECK-LABEL: @mul8_low_miss_half_width( 691; CHECK-NEXT: [[IN0LO:%.*]] = and i8 [[IN0:%.*]], 15 692; CHECK-NEXT: [[IN0HI:%.*]] = lshr i8 [[IN0]], 3 693; CHECK-NEXT: [[IN1LO:%.*]] = and i8 [[IN1:%.*]], 15 694; CHECK-NEXT: [[IN1HI:%.*]] = lshr i8 [[IN1]], 3 695; CHECK-NEXT: [[M10:%.*]] = mul i8 [[IN1HI]], [[IN0LO]] 696; CHECK-NEXT: [[M01:%.*]] = mul i8 [[IN1LO]], [[IN0HI]] 697; CHECK-NEXT: [[M00:%.*]] = mul nuw i8 [[IN1LO]], [[IN0LO]] 698; CHECK-NEXT: [[ADDC:%.*]] = add i8 [[M10]], [[M01]] 699; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[ADDC]], 3 700; CHECK-NEXT: [[RETLO:%.*]] = add i8 [[SHL]], [[M00]] 701; CHECK-NEXT: ret i8 [[RETLO]] 702; 703 %In0Lo = and i8 %in0, 15 704 %In0Hi = lshr i8 %in0, 3 ; Should be 4 705 %In1Lo = and i8 %in1, 15 706 %In1Hi = lshr i8 %in1, 3 707 %m10 = mul i8 %In1Hi, %In0Lo 708 %m01 = mul i8 %In1Lo, %In0Hi 709 %m00 = mul i8 %In1Lo, %In0Lo 710 %addc = add i8 %m10, %m01 711 %shl = shl i8 %addc, 3 712 %retLo = add i8 %shl, %m00 713 ret i8 %retLo 714} 715 716; Test case to show shl doesn't need hasOneUse constraint 717define i32 @mul32_low_extra_shl_use(i32 %in0, i32 %in1) { 718; CHECK-LABEL: @mul32_low_extra_shl_use( 719; CHECK-NEXT: [[IN0HI:%.*]] = lshr i32 [[IN0:%.*]], 16 720; CHECK-NEXT: [[IN1HI:%.*]] = lshr i32 [[IN1:%.*]], 16 721; CHECK-NEXT: [[M10:%.*]] = mul i32 [[IN1HI]], [[IN0]] 722; CHECK-NEXT: [[M01:%.*]] = mul i32 [[IN1]], [[IN0HI]] 723; CHECK-NEXT: [[ADDC:%.*]] = add i32 [[M10]], [[M01]] 724; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[ADDC]], 16 725; CHECK-NEXT: call void @use32(i32 [[SHL]]) 726; CHECK-NEXT: [[RETLO:%.*]] = mul i32 [[IN0]], [[IN1]] 727; CHECK-NEXT: ret i32 [[RETLO]] 728; 729 %In0Lo = and i32 %in0, 65535 730 %In0Hi = lshr i32 %in0, 16 731 %In1Lo = and i32 %in1, 65535 732 %In1Hi = lshr i32 %in1, 16 733 %m10 = mul i32 %In1Hi, %In0Lo 734 %m01 = mul i32 %In1Lo, %In0Hi 735 %m00 = mul i32 %In1Lo, %In0Lo 736 %addc = add i32 %m10, %m01 737 %shl = shl i32 %addc, 16 738 call void @use32(i32 %shl) 739 %retLo = add i32 %shl, %m00 740 ret i32 %retLo 741} 742