1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4declare void @use8(i8) 5declare void @use32(i32) 6 7; These would crash if we didn't check for a negative shift. 8 9; https://llvm.org/bugs/show_bug.cgi?id=12967 10 11define void @pr12967() { 12; CHECK-LABEL: @pr12967( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: br label [[LOOP:%.*]] 15; CHECK: loop: 16; CHECK-NEXT: br label [[LOOP]] 17; 18entry: 19 br label %loop 20 21loop: 22 %c = phi i32 [ %shl, %loop ], [ undef, %entry ] 23 %shr = shl i32 %c, 7 24 %shl = lshr i32 %shr, -2 25 br label %loop 26} 27 28; https://llvm.org/bugs/show_bug.cgi?id=26760 29 30define void @pr26760() { 31; CHECK-LABEL: @pr26760( 32; CHECK-NEXT: entry: 33; CHECK-NEXT: br label [[LOOP:%.*]] 34; CHECK: loop: 35; CHECK-NEXT: br label [[LOOP]] 36; 37entry: 38 br label %loop 39 40loop: 41 %c = phi i32 [ %shl, %loop ], [ undef, %entry ] 42 %shr = lshr i32 %c, 7 43 %shl = shl i32 %shr, -2 44 br label %loop 45} 46 47; Converting the 2 shifts to SHL 6 without the AND is wrong. 48; https://llvm.org/bugs/show_bug.cgi?id=8547 49 50define i32 @pr8547(ptr %g) { 51; CHECK-LABEL: @pr8547( 52; CHECK-NEXT: codeRepl: 53; CHECK-NEXT: br label [[FOR_COND:%.*]] 54; CHECK: for.cond: 55; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 0, [[CODEREPL:%.*]] ], [ 5, [[FOR_COND]] ] 56; CHECK-NEXT: store i32 [[STOREMERGE]], ptr [[G:%.*]], align 4 57; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[STOREMERGE]], 6 58; CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 64 59; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[CONV2]], 0 60; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[CODEREPL2:%.*]] 61; CHECK: codeRepl2: 62; CHECK-NEXT: ret i32 [[CONV2]] 63; 64codeRepl: 65 br label %for.cond 66 67for.cond: 68 %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ] 69 store i32 %storemerge, ptr %g, align 4 70 %shl = shl i32 %storemerge, 30 71 %conv2 = lshr i32 %shl, 24 72 %tobool = icmp eq i32 %conv2, 0 73 br i1 %tobool, label %for.cond, label %codeRepl2 74 75codeRepl2: 76 ret i32 %conv2 77} 78 79; Two same direction shifts that add up to more than the bitwidth should get 80; folded to zero. 81 82define i32 @shl_shl(i32 %A) { 83; CHECK-LABEL: @shl_shl( 84; CHECK-NEXT: ret i32 0 85; 86 %B = shl i32 %A, 6 87 %C = shl i32 %B, 28 88 ret i32 %C 89} 90 91define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) { 92; CHECK-LABEL: @shl_shl_splat_vec( 93; CHECK-NEXT: ret <2 x i33> zeroinitializer 94; 95 %B = shl <2 x i33> %A, <i33 5, i33 5> 96 %C = shl <2 x i33> %B, <i33 28, i33 28> 97 ret <2 x i33> %C 98} 99 100; FIXME 101 102define <2 x i33> @shl_shl_vec(<2 x i33> %A) { 103; CHECK-LABEL: @shl_shl_vec( 104; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], <i33 6, i33 5> 105; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 27, i33 28> 106; CHECK-NEXT: ret <2 x i33> [[C]] 107; 108 %B = shl <2 x i33> %A, <i33 6, i33 5> 109 %C = shl <2 x i33> %B, <i33 27, i33 28> 110 ret <2 x i33> %C 111} 112 113define i232 @lshr_lshr(i232 %A) { 114; CHECK-LABEL: @lshr_lshr( 115; CHECK-NEXT: ret i232 0 116; 117 %B = lshr i232 %A, 231 118 %C = lshr i232 %B, 1 119 ret i232 %C 120} 121 122define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) { 123; CHECK-LABEL: @lshr_lshr_splat_vec( 124; CHECK-NEXT: ret <2 x i32> zeroinitializer 125; 126 %B = lshr <2 x i32> %A, <i32 28, i32 28> 127 %C = lshr <2 x i32> %B, <i32 4, i32 4> 128 ret <2 x i32> %C 129} 130 131define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) { 132; CHECK-LABEL: @lshr_lshr_vec( 133; CHECK-NEXT: ret <2 x i32> zeroinitializer 134; 135 %B = lshr <2 x i32> %A, <i32 29, i32 28> 136 %C = lshr <2 x i32> %B, <i32 4, i32 5> 137 ret <2 x i32> %C 138} 139 140define i8 @shl_trunc_bigger_lshr(i32 %x) { 141; CHECK-LABEL: @shl_trunc_bigger_lshr( 142; CHECK-NEXT: [[SH_DIFF:%.*]] = lshr i32 [[X:%.*]], 2 143; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc i32 [[SH_DIFF]] to i8 144; CHECK-NEXT: [[LT:%.*]] = and i8 [[TR_SH_DIFF]], -8 145; CHECK-NEXT: ret i8 [[LT]] 146; 147 %rt = lshr i32 %x, 5 148 %tr = trunc i32 %rt to i8 149 %lt = shl i8 %tr, 3 150 ret i8 %lt 151} 152 153define i8 @shl_trunc_smaller_lshr(i32 %x) { 154; CHECK-LABEL: @shl_trunc_smaller_lshr( 155; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8 156; CHECK-NEXT: [[TMP2:%.*]] = shl i8 [[TMP1]], 2 157; CHECK-NEXT: [[LT:%.*]] = and i8 [[TMP2]], -32 158; CHECK-NEXT: ret i8 [[LT]] 159; 160 %rt = lshr i32 %x, 3 161 %tr = trunc i32 %rt to i8 162 %lt = shl i8 %tr, 5 163 ret i8 %lt 164} 165 166define i24 @shl_trunc_bigger_ashr(i32 %x) { 167; CHECK-LABEL: @shl_trunc_bigger_ashr( 168; CHECK-NEXT: [[SH_DIFF:%.*]] = ashr i32 [[X:%.*]], 9 169; CHECK-NEXT: [[TR_SH_DIFF:%.*]] = trunc nsw i32 [[SH_DIFF]] to i24 170; CHECK-NEXT: [[LT:%.*]] = and i24 [[TR_SH_DIFF]], -8 171; CHECK-NEXT: ret i24 [[LT]] 172; 173 %rt = ashr i32 %x, 12 174 %tr = trunc i32 %rt to i24 175 %lt = shl i24 %tr, 3 176 ret i24 %lt 177} 178 179define i24 @shl_trunc_smaller_ashr(i32 %x) { 180; CHECK-LABEL: @shl_trunc_smaller_ashr( 181; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i24 182; CHECK-NEXT: [[TMP2:%.*]] = shl i24 [[TMP1]], 3 183; CHECK-NEXT: [[LT:%.*]] = and i24 [[TMP2]], -8192 184; CHECK-NEXT: ret i24 [[LT]] 185; 186 %rt = ashr i32 %x, 10 187 %tr = trunc i32 %rt to i24 188 %lt = shl i24 %tr, 13 189 ret i24 %lt 190} 191 192define i8 @shl_trunc_bigger_shl(i32 %x) { 193; CHECK-LABEL: @shl_trunc_bigger_shl( 194; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8 195; CHECK-NEXT: [[TR:%.*]] = shl i8 [[X_TR]], 6 196; CHECK-NEXT: ret i8 [[TR]] 197; 198 %rt = shl i32 %x, 4 199 %tr = trunc i32 %rt to i8 200 %lt = shl i8 %tr, 2 201 ret i8 %lt 202} 203 204define i8 @shl_trunc_smaller_shl(i32 %x) { 205; CHECK-LABEL: @shl_trunc_smaller_shl( 206; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i8 207; CHECK-NEXT: [[TR:%.*]] = shl i8 [[X_TR]], 6 208; CHECK-NEXT: ret i8 [[TR]] 209; 210 %rt = shl i32 %x, 2 211 %tr = trunc i32 %rt to i8 212 %lt = shl i8 %tr, 4 213 ret i8 %lt 214} 215 216define i8 @shl_trunc_bigger_lshr_use1(i32 %x) { 217; CHECK-LABEL: @shl_trunc_bigger_lshr_use1( 218; CHECK-NEXT: [[RT:%.*]] = lshr i32 [[X:%.*]], 5 219; CHECK-NEXT: call void @use32(i32 [[RT]]) 220; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[RT]] to i8 221; CHECK-NEXT: [[LT:%.*]] = shl i8 [[TR]], 3 222; CHECK-NEXT: ret i8 [[LT]] 223; 224 %rt = lshr i32 %x, 5 225 call void @use32(i32 %rt) 226 %tr = trunc i32 %rt to i8 227 %lt = shl i8 %tr, 3 228 ret i8 %lt 229} 230 231define i8 @shl_trunc_smaller_lshr_use1(i32 %x) { 232; CHECK-LABEL: @shl_trunc_smaller_lshr_use1( 233; CHECK-NEXT: [[RT:%.*]] = lshr i32 [[X:%.*]], 3 234; CHECK-NEXT: call void @use32(i32 [[RT]]) 235; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[RT]] to i8 236; CHECK-NEXT: [[LT:%.*]] = shl i8 [[TR]], 5 237; CHECK-NEXT: ret i8 [[LT]] 238; 239 %rt = lshr i32 %x, 3 240 call void @use32(i32 %rt) 241 %tr = trunc i32 %rt to i8 242 %lt = shl i8 %tr, 5 243 ret i8 %lt 244} 245 246define i8 @shl_trunc_bigger_lshr_use2(i32 %x) { 247; CHECK-LABEL: @shl_trunc_bigger_lshr_use2( 248; CHECK-NEXT: [[RT:%.*]] = lshr i32 [[X:%.*]], 5 249; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[RT]] to i8 250; CHECK-NEXT: call void @use8(i8 [[TR]]) 251; CHECK-NEXT: [[LT:%.*]] = shl i8 [[TR]], 3 252; CHECK-NEXT: ret i8 [[LT]] 253; 254 %rt = lshr i32 %x, 5 255 %tr = trunc i32 %rt to i8 256 call void @use8(i8 %tr) 257 %lt = shl i8 %tr, 3 258 ret i8 %lt 259} 260 261define i8 @shl_trunc_smaller_lshr_use2(i32 %x) { 262; CHECK-LABEL: @shl_trunc_smaller_lshr_use2( 263; CHECK-NEXT: [[RT:%.*]] = lshr i32 [[X:%.*]], 3 264; CHECK-NEXT: [[TR:%.*]] = trunc i32 [[RT]] to i8 265; CHECK-NEXT: call void @use8(i8 [[TR]]) 266; CHECK-NEXT: [[LT:%.*]] = shl i8 [[TR]], 5 267; CHECK-NEXT: ret i8 [[LT]] 268; 269 %rt = lshr i32 %x, 3 270 %tr = trunc i32 %rt to i8 271 call void @use8(i8 %tr) 272 %lt = shl i8 %tr, 5 273 ret i8 %lt 274} 275 276define i32 @ashr_ashr_constants_use(i32 %x) { 277; CHECK-LABEL: @ashr_ashr_constants_use( 278; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] 279; CHECK-NEXT: call void @use32(i32 [[S]]) 280; CHECK-NEXT: [[R:%.*]] = ashr i32 -5, [[X]] 281; CHECK-NEXT: ret i32 [[R]] 282; 283 %s = ashr i32 -33, %x 284 call void @use32(i32 %s) 285 %r = ashr i32 %s, 3 286 ret i32 %r 287} 288 289define <3 x i8> @ashr_ashr_constants_vec(<3 x i8> %x) { 290; CHECK-LABEL: @ashr_ashr_constants_vec( 291; CHECK-NEXT: [[R:%.*]] = ashr <3 x i8> <i8 4, i8 poison, i8 -1>, [[X:%.*]] 292; CHECK-NEXT: ret <3 x i8> [[R]] 293; 294 %s = ashr <3 x i8> <i8 33, i8 -2, i8 -128>, %x 295 %r = ashr <3 x i8> %s, <i8 3, i8 -1, i8 7> 296 ret <3 x i8> %r 297} 298 299define i32 @lshr_lshr_constants_use(i32 %x) { 300; CHECK-LABEL: @lshr_lshr_constants_use( 301; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] 302; CHECK-NEXT: call void @use32(i32 [[S]]) 303; CHECK-NEXT: [[R:%.*]] = lshr i32 536870907, [[X]] 304; CHECK-NEXT: ret i32 [[R]] 305; 306 %s = lshr i32 -33, %x 307 call void @use32(i32 %s) 308 %r = lshr i32 %s, 3 309 ret i32 %r 310} 311 312define <3 x i8> @lshr_lshr_constants_vec(<3 x i8> %x) { 313; CHECK-LABEL: @lshr_lshr_constants_vec( 314; CHECK-NEXT: [[R:%.*]] = lshr <3 x i8> <i8 4, i8 poison, i8 0>, [[X:%.*]] 315; CHECK-NEXT: ret <3 x i8> [[R]] 316; 317 %s = lshr <3 x i8> <i8 33, i8 -2, i8 1>, %x 318 %r = lshr <3 x i8> %s, <i8 3, i8 -1, i8 7> 319 ret <3 x i8> %r 320} 321 322define i32 @shl_shl_constants_use(i32 %x) { 323; CHECK-LABEL: @shl_shl_constants_use( 324; CHECK-NEXT: [[S:%.*]] = shl i32 -2013265920, [[X:%.*]] 325; CHECK-NEXT: call void @use32(i32 [[S]]) 326; CHECK-NEXT: [[R:%.*]] = shl i32 1073741824, [[X]] 327; CHECK-NEXT: ret i32 [[R]] 328; 329 %s = shl i32 2281701376, %x ; 0x8800_0000 330 call void @use32(i32 %s) 331 %r = shl i32 %s, 3 332 ret i32 %r 333} 334 335define <3 x i8> @shl_shl_constants_vec(<3 x i8> %x) { 336; CHECK-LABEL: @shl_shl_constants_vec( 337; CHECK-NEXT: [[R:%.*]] = shl <3 x i8> <i8 8, i8 poison, i8 0>, [[X:%.*]] 338; CHECK-NEXT: ret <3 x i8> [[R]] 339; 340 %s = shl <3 x i8> <i8 33, i8 -2, i8 -128>, %x 341 %r = shl <3 x i8> %s, <i8 3, i8 -1, i8 7> 342 ret <3 x i8> %r 343} 344 345; PR9809 346define i32 @shl_shl_constants_div(i32 %a, i32 %b) { 347; CHECK-LABEL: @shl_shl_constants_div( 348; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B:%.*]], 2 349; CHECK-NEXT: [[DIV1:%.*]] = lshr i32 [[A:%.*]], [[TMP1]] 350; CHECK-NEXT: ret i32 [[DIV1]] 351; 352 %shl1 = shl i32 1, %b 353 %shl2 = shl i32 %shl1, 2 354 %div = udiv i32 %a, %shl2 355 ret i32 %div 356} 357 358define i32 @ashr_lshr_constants(i32 %x) { 359; CHECK-LABEL: @ashr_lshr_constants( 360; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] 361; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 362; CHECK-NEXT: ret i32 [[R]] 363; 364 %s = ashr i32 -33, %x 365 %r = lshr i32 %s, 3 366 ret i32 %r 367} 368 369define i32 @ashr_shl_constants(i32 %x) { 370; CHECK-LABEL: @ashr_shl_constants( 371; CHECK-NEXT: [[S:%.*]] = ashr i32 -33, [[X:%.*]] 372; CHECK-NEXT: [[R:%.*]] = shl nsw i32 [[S]], 3 373; CHECK-NEXT: ret i32 [[R]] 374; 375 %s = ashr i32 -33, %x 376 %r = shl i32 %s, 3 377 ret i32 %r 378} 379 380define i32 @lshr_ashr_constants(i32 %x) { 381; CHECK-LABEL: @lshr_ashr_constants( 382; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] 383; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 384; CHECK-NEXT: ret i32 [[R]] 385; 386 %s = lshr i32 -33, %x 387 %r = ashr i32 %s, 3 388 ret i32 %r 389} 390 391define i32 @lshr_shl_constants(i32 %x) { 392; CHECK-LABEL: @lshr_shl_constants( 393; CHECK-NEXT: [[S:%.*]] = lshr i32 -33, [[X:%.*]] 394; CHECK-NEXT: [[R:%.*]] = shl i32 [[S]], 3 395; CHECK-NEXT: ret i32 [[R]] 396; 397 %s = lshr i32 -33, %x 398 %r = shl i32 %s, 3 399 ret i32 %r 400} 401 402define i32 @shl_ashr_constants(i32 %x) { 403; CHECK-LABEL: @shl_ashr_constants( 404; CHECK-NEXT: [[S:%.*]] = shl i32 -33, [[X:%.*]] 405; CHECK-NEXT: [[R:%.*]] = ashr i32 [[S]], 3 406; CHECK-NEXT: ret i32 [[R]] 407; 408 %s = shl i32 -33, %x 409 %r = ashr i32 %s, 3 410 ret i32 %r 411} 412 413define i32 @shl_lshr_constants(i32 %x) { 414; CHECK-LABEL: @shl_lshr_constants( 415; CHECK-NEXT: [[S:%.*]] = shl i32 -33, [[X:%.*]] 416; CHECK-NEXT: [[R:%.*]] = lshr i32 [[S]], 3 417; CHECK-NEXT: ret i32 [[R]] 418; 419 %s = shl i32 -33, %x 420 %r = lshr i32 %s, 3 421 ret i32 %r 422} 423 424; Pre-shift a constant to eliminate lshr. 425 426define i8 @shl_lshr_demand1(i8 %x) { 427; CHECK-LABEL: @shl_lshr_demand1( 428; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X:%.*]] 429; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32 430; CHECK-NEXT: ret i8 [[R]] 431; 432 %shl = shl i8 40, %x ; 0b0010_1000 433 %lshr = lshr i8 %shl, 3 434 %r = or i8 %lshr, 224 ; 0b1110_0000 435 ret i8 %r 436} 437 438; Pre-shift a constant to eliminate disguised lshr. 439 440define i8 @shl_ashr_demand2(i8 %x) { 441; CHECK-LABEL: @shl_ashr_demand2( 442; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]] 443; CHECK-NEXT: call void @use8(i8 [[SHL]]) 444; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X]] 445; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32 446; CHECK-NEXT: ret i8 [[R]] 447; 448 %shl = shl i8 40, %x ; 0b0010_1000 449 call void @use8(i8 %shl) 450 %lshr = ashr i8 %shl, 3 451 %r = or i8 %lshr, 224 ; 0b1110_0000 452 ret i8 %r 453} 454 455; It is not safe to pre-shift because we demand an extra high bit. 456 457define i8 @shl_lshr_demand3(i8 %x) { 458; CHECK-LABEL: @shl_lshr_demand3( 459; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]] 460; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3 461; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[LSHR]], -64 462; CHECK-NEXT: ret i8 [[R]] 463; 464 %shl = shl i8 40, %x ; 0b0010_1000 465 %lshr = lshr i8 %shl, 3 466 %r = or i8 %lshr, 192 ; 0b1100_0000 467 ret i8 %r 468} 469 470; It is not valid to pre-shift because we lose the low bit of 44. 471 472define i8 @shl_lshr_demand4(i8 %x) { 473; CHECK-LABEL: @shl_lshr_demand4( 474; CHECK-NEXT: [[SHL:%.*]] = shl i8 44, [[X:%.*]] 475; CHECK-NEXT: [[LSHR:%.*]] = lshr i8 [[SHL]], 3 476; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[LSHR]], -32 477; CHECK-NEXT: ret i8 [[R]] 478; 479 %shl = shl i8 44, %x ; 0b0010_1100 480 %lshr = lshr i8 %shl, 3 481 %r = or i8 %lshr, 224 ; 0b1110_0000 482 ret i8 %r 483} 484 485; Splat vectors work too, and we don't care what instruction reduces demand for high bits. 486 487define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) { 488; CHECK-LABEL: @shl_lshr_demand5( 489; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> splat (i8 37), [[X:%.*]] 490; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> 491; CHECK-NEXT: ret <2 x i6> [[R]] 492; 493 %shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100 494 %lshr = lshr <2 x i8> %shl, <i8 2, i8 2> 495 %r = trunc <2 x i8> %lshr to <2 x i6> 496 ret <2 x i6> %r 497} 498 499; TODO: allow undef/poison elements for this transform. 500 501define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) { 502; CHECK-LABEL: @shl_lshr_demand5_undef_left( 503; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]] 504; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], splat (i8 2) 505; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> 506; CHECK-NEXT: ret <2 x i6> [[R]] 507; 508 %shl = shl <2 x i8> <i8 undef, i8 148>, %x ; 0b1001_0100 509 %lshr = lshr <2 x i8> %shl, <i8 2, i8 2> 510 %r = trunc <2 x i8> %lshr to <2 x i6> 511 ret <2 x i6> %r 512} 513 514; TODO: allow undef/poison elements for this transform. 515 516define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) { 517; CHECK-LABEL: @shl_lshr_demand5_undef_right( 518; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] 519; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 undef, i8 2> 520; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> 521; CHECK-NEXT: ret <2 x i6> [[R]] 522; 523 %shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100 524 %lshr = lshr <2 x i8> %shl, <i8 undef, i8 2> 525 %r = trunc <2 x i8> %lshr to <2 x i6> 526 ret <2 x i6> %r 527} 528 529; TODO: allow non-splat vector constants. 530 531define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) { 532; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left( 533; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]] 534; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 1, i8 2> 535; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6> 536; CHECK-NEXT: ret <2 x i6> [[R]] 537; 538 %shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100 539 %lshr = lshr <2 x i8> %shl, <i8 1, i8 2> 540 %r = trunc <2 x i8> %lshr to <2 x i6> 541 ret <2 x i6> %r 542} 543 544; non-splat shl constant is ok. 545 546define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) { 547; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right( 548; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 36>, [[X:%.*]] 549; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6> 550; CHECK-NEXT: ret <2 x i6> [[R]] 551; 552 %shl = shl <2 x i8> <i8 148, i8 144>, %x ; 0b1001_0100, 0b1001_0000 553 %lshr = lshr <2 x i8> %shl, <i8 2, i8 2> 554 %r = trunc <2 x i8> %lshr to <2 x i6> 555 ret <2 x i6> %r 556} 557 558; This is possible, but may require significant changes to the demanded bits framework. 559 560define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) { 561; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both( 562; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -104, i8 -108>, [[X:%.*]] 563; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 3, i8 2> 564; CHECK-NEXT: [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6> 565; CHECK-NEXT: ret <2 x i6> [[R]] 566; 567 %shl = shl <2 x i8> <i8 152, i8 148>, %x ; 0b1001_1000, 0b1001_0100 568 %lshr = lshr <2 x i8> %shl, <i8 3, i8 2> 569 %r = trunc <2 x i8> %lshr to <2 x i6> 570 ret <2 x i6> %r 571} 572 573; 'and' can reduce demand for high bits too. 574 575define i16 @shl_lshr_demand6(i16 %x) { 576; CHECK-LABEL: @shl_lshr_demand6( 577; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2057, [[X:%.*]] 578; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 4094 579; CHECK-NEXT: ret i16 [[R]] 580; 581 %shl = shl i16 32912, %x ; 0b1000_0000_1001_0000 582 %lshr = lshr i16 %shl, 4 583 %r = and i16 %lshr, 4094 ; 0b0000_1111_1111_1110 584 ret i16 %r 585} 586 587; Pre-shift a constant to eliminate shl. 588 589define i8 @lshr_shl_demand1(i8 %x) { 590; CHECK-LABEL: @lshr_shl_demand1( 591; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -32, [[X:%.*]] 592; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], 7 593; CHECK-NEXT: ret i8 [[R]] 594; 595 %shr = lshr i8 28, %x ; 0b0001_1100 596 %shl = shl i8 %shr, 3 597 %r = or i8 %shl, 7 ; 0b0000_0111 598 ret i8 %r 599} 600 601; Extra use on lshr is ok and 'and' is another demand limiter. 602 603define i8 @lshr_shl_demand2(i8 %x) { 604; CHECK-LABEL: @lshr_shl_demand2( 605; CHECK-NEXT: [[SHR:%.*]] = lshr i8 28, [[X:%.*]] 606; CHECK-NEXT: call void @use8(i8 [[SHR]]) 607; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 -32, [[X]] 608; CHECK-NEXT: [[R:%.*]] = and i8 [[TMP1]], -16 609; CHECK-NEXT: ret i8 [[R]] 610; 611 %shr = lshr i8 28, %x ; 0b0001_1100 612 call void @use8(i8 %shr) 613 %shl = shl i8 %shr, 3 614 %r = and i8 %shl, -16 ; 0b1111_0000 615 ret i8 %r 616} 617 618; It is not safe to pre-shift because we demand an extra low bit. 619 620define i8 @lshr_shl_demand3(i8 %x) { 621; CHECK-LABEL: @lshr_shl_demand3( 622; CHECK-NEXT: [[SHR:%.*]] = lshr i8 28, [[X:%.*]] 623; CHECK-NEXT: [[SHL:%.*]] = shl nuw i8 [[SHR]], 3 624; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SHL]], 3 625; CHECK-NEXT: ret i8 [[R]] 626; 627 %shr = lshr i8 28, %x ; 0b0001_1100 628 %shl = shl i8 %shr, 3 629 %r = or i8 %shl, 3 ; 0b0000_0011 630 ret i8 %r 631} 632 633; It is not valid to pre-shift because we lose the high bit of 60. 634 635define i8 @lshr_shl_demand4(i8 %x) { 636; CHECK-LABEL: @lshr_shl_demand4( 637; CHECK-NEXT: [[SHR:%.*]] = lshr i8 60, [[X:%.*]] 638; CHECK-NEXT: [[SHL:%.*]] = shl i8 [[SHR]], 3 639; CHECK-NEXT: [[R:%.*]] = or disjoint i8 [[SHL]], 7 640; CHECK-NEXT: ret i8 [[R]] 641; 642 %shr = lshr i8 60, %x ; 0b0011_1100 643 %shl = shl i8 %shr, 3 644 %r = or i8 %shl, 7 ; 0b0000_0111 645 ret i8 %r 646} 647 648; Splat vectors work too. 649 650define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) { 651; CHECK-LABEL: @lshr_shl_demand5( 652; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> splat (i8 -76), [[X:%.*]] 653; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) 654; CHECK-NEXT: ret <2 x i8> [[R]] 655; 656 %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101 657 %shl = shl <2 x i8> %shr, <i8 2, i8 2> 658 %r = and <2 x i8> %shl, <i8 108, i8 108> ; 0b0110_1100 659 ret <2 x i8> %r 660} 661 662; TODO: allow undef/poison elements for this transform. 663 664define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) { 665; CHECK-LABEL: @lshr_shl_demand5_undef_left( 666; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] 667; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 undef, i8 2> 668; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) 669; CHECK-NEXT: ret <2 x i8> [[R]] 670; 671 %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101 672 %shl = shl <2 x i8> %shr, <i8 undef, i8 2> 673 %r = and <2 x i8> %shl, <i8 108, i8 108> ; 0b0110_1100 674 ret <2 x i8> %r 675} 676 677; TODO: allow undef/poison elements for this transform. 678 679define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) { 680; CHECK-LABEL: @lshr_shl_demand5_undef_right( 681; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> <i8 undef, i8 45>, [[X:%.*]] 682; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], splat (i8 2) 683; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) 684; CHECK-NEXT: ret <2 x i8> [[R]] 685; 686 %shr = lshr <2 x i8> <i8 undef, i8 45>, %x ; 0b0010_1101 687 %shl = shl <2 x i8> %shr, <i8 2, i8 2> 688 %r = and <2 x i8> %shl, <i8 108, i8 108> ; 0b0110_1100 689 ret <2 x i8> %r 690} 691 692; TODO: allow non-splat vector constants. 693 694define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) { 695; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_left( 696; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]] 697; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 1, i8 2> 698; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108) 699; CHECK-NEXT: ret <2 x i8> [[R]] 700; 701 %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101 702 %shl = shl <2 x i8> %shr, <i8 1, i8 2> 703 %r = and <2 x i8> %shl, <i8 108, i8 108> ; 0b0110_1100 704 ret <2 x i8> %r 705} 706 707; non-splat lshr constant is ok. 708 709define <2 x i8> @lshr_shl_demand5_nonuniform_vec_right(<2 x i8> %x) { 710; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_right( 711; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i8> <i8 -76, i8 52>, [[X:%.*]] 712; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108) 713; CHECK-NEXT: ret <2 x i8> [[R]] 714; 715 %shr = lshr <2 x i8> <i8 45, i8 13>, %x ; 0b0010_1101. 0b0000_1101 716 %shl = shl <2 x i8> %shr, <i8 2, i8 2> 717 %r = and <2 x i8> %shl, <i8 108, i8 108> ; 0b0110_1100 718 ret <2 x i8> %r 719} 720 721; This is possible, but may require significant changes to the demanded bits framework. 722 723define <2 x i8> @lshr_shl_demand5_nonuniform_vec_both(<2 x i8> %x) { 724; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_both( 725; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> <i8 45, i8 13>, [[X:%.*]] 726; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 2, i8 4> 727; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[SHL]], <i8 -4, i8 -16> 728; CHECK-NEXT: ret <2 x i8> [[R]] 729; 730 %shr = lshr <2 x i8> <i8 45, i8 13>, %x ; 0b0010_1101. 0b0000_1101 731 %shl = shl <2 x i8> %shr, <i8 2, i8 4> 732 %r = and <2 x i8> %shl, <i8 -4, i8 -16> 733 ret <2 x i8> %r 734} 735 736@g = external global i8, align 8 737 738define i64 @ashr_ashr_constexpr() { 739; CHECK-LABEL: @ashr_ashr_constexpr( 740; CHECK-NEXT: [[SHR2:%.*]] = ashr exact i64 ptrtoint (ptr @g to i64), 3 741; CHECK-NEXT: ret i64 [[SHR2]] 742; 743 %shr = ashr i64 ptrtoint (ptr @g to i64), 1 744 %shr2 = ashr i64 %shr, 2 745 ret i64 %shr2 746} 747