1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -mtriple=arm64-apple=ios -S -o - %s | FileCheck --check-prefix=OPT %s 3; RUN: llc < %s -mtriple=arm64-eabi | FileCheck --check-prefix=LLC %s 4 5%struct.X = type { i8, i8, [2 x i8] } 6%struct.Y = type { i32, i8 } 7%struct.Z = type { i8, i8, [2 x i8], i16 } 8%struct.A = type { i64, i8 } 9 10define void @foo(ptr nocapture %x, ptr nocapture %y) nounwind optsize ssp { 11; LLC-LABEL: foo: 12; LLC: // %bb.0: 13; LLC-NEXT: ldr w8, [x0] 14; LLC-NEXT: ubfx w8, w8, #3, #1 15; LLC-NEXT: strb w8, [x1, #4] 16; LLC-NEXT: ret 17; OPT-LABEL: @foo( 18; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr [[X:%.*]], align 4 19; OPT-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_Y:%.*]], ptr [[Y:%.*]], i64 0, i32 1 20; OPT-NEXT: [[BF_CLEAR:%.*]] = lshr i32 [[TMP1]], 3 21; OPT-NEXT: [[BF_CLEAR_LOBIT:%.*]] = and i32 [[BF_CLEAR]], 1 22; OPT-NEXT: [[FROMBOOL:%.*]] = trunc i32 [[BF_CLEAR_LOBIT]] to i8 23; OPT-NEXT: store i8 [[FROMBOOL]], ptr [[B]], align 1 24; OPT-NEXT: ret void 25; 26 %tmp1 = load i32, ptr %x, align 4 27 %b = getelementptr inbounds %struct.Y, ptr %y, i64 0, i32 1 28 %bf.clear = lshr i32 %tmp1, 3 29 %bf.clear.lobit = and i32 %bf.clear, 1 30 %frombool = trunc i32 %bf.clear.lobit to i8 31 store i8 %frombool, ptr %b, align 1 32 ret void 33} 34 35define i32 @baz(i64 %cav1.coerce) nounwind { 36; LLC-LABEL: baz: 37; LLC: // %bb.0: 38; LLC-NEXT: sbfx w0, w0, #0, #4 39; LLC-NEXT: ret 40; OPT-LABEL: @baz( 41; OPT-NEXT: [[TMP:%.*]] = trunc i64 [[CAV1_COERCE:%.*]] to i32 42; OPT-NEXT: [[TMP1:%.*]] = shl i32 [[TMP]], 28 43; OPT-NEXT: [[BF_VAL_SEXT:%.*]] = ashr exact i32 [[TMP1]], 28 44; OPT-NEXT: ret i32 [[BF_VAL_SEXT]] 45; 46 %tmp = trunc i64 %cav1.coerce to i32 47 %tmp1 = shl i32 %tmp, 28 48 %bf.val.sext = ashr exact i32 %tmp1, 28 49 ret i32 %bf.val.sext 50} 51 52define i32 @bar(i64 %cav1.coerce) nounwind { 53; LLC-LABEL: bar: 54; LLC: // %bb.0: 55; LLC-NEXT: sbfx w0, w0, #4, #6 56; LLC-NEXT: ret 57; OPT-LABEL: @bar( 58; OPT-NEXT: [[TMP:%.*]] = trunc i64 [[CAV1_COERCE:%.*]] to i32 59; OPT-NEXT: [[CAV1_SROA_0_1_INSERT:%.*]] = shl i32 [[TMP]], 22 60; OPT-NEXT: [[TMP1:%.*]] = ashr i32 [[CAV1_SROA_0_1_INSERT]], 26 61; OPT-NEXT: ret i32 [[TMP1]] 62; 63 %tmp = trunc i64 %cav1.coerce to i32 64 %cav1.sroa.0.1.insert = shl i32 %tmp, 22 65 %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26 66 ret i32 %tmp1 67} 68 69define void @fct1(ptr nocapture %x, ptr nocapture %y) nounwind optsize ssp { 70; LLC-LABEL: fct1: 71; LLC: // %bb.0: 72; LLC-NEXT: ldr x8, [x0] 73; LLC-NEXT: ubfx x8, x8, #3, #1 74; LLC-NEXT: str x8, [x1] 75; LLC-NEXT: ret 76; OPT-LABEL: @fct1( 77; OPT-NEXT: [[TMP1:%.*]] = load i64, ptr [[X:%.*]], align 4 78; OPT-NEXT: [[BF_CLEAR:%.*]] = lshr i64 [[TMP1]], 3 79; OPT-NEXT: [[BF_CLEAR_LOBIT:%.*]] = and i64 [[BF_CLEAR]], 1 80; OPT-NEXT: store i64 [[BF_CLEAR_LOBIT]], ptr [[Y:%.*]], align 8 81; OPT-NEXT: ret void 82; 83 %tmp1 = load i64, ptr %x, align 4 84 %bf.clear = lshr i64 %tmp1, 3 85 %bf.clear.lobit = and i64 %bf.clear, 1 86 store i64 %bf.clear.lobit, ptr %y, align 8 87 ret void 88} 89 90define i64 @fct2(i64 %cav1.coerce) nounwind { 91; LLC-LABEL: fct2: 92; LLC: // %bb.0: 93; LLC-NEXT: sbfx x0, x0, #0, #36 94; LLC-NEXT: ret 95; OPT-LABEL: @fct2( 96; OPT-NEXT: [[TMP:%.*]] = shl i64 [[CAV1_COERCE:%.*]], 28 97; OPT-NEXT: [[BF_VAL_SEXT:%.*]] = ashr exact i64 [[TMP]], 28 98; OPT-NEXT: ret i64 [[BF_VAL_SEXT]] 99; 100 %tmp = shl i64 %cav1.coerce, 28 101 %bf.val.sext = ashr exact i64 %tmp, 28 102 ret i64 %bf.val.sext 103} 104 105define i64 @fct3(i64 %cav1.coerce) nounwind { 106; LLC-LABEL: fct3: 107; LLC: // %bb.0: 108; LLC-NEXT: sbfx x0, x0, #4, #38 109; LLC-NEXT: ret 110; OPT-LABEL: @fct3( 111; OPT-NEXT: [[CAV1_SROA_0_1_INSERT:%.*]] = shl i64 [[CAV1_COERCE:%.*]], 22 112; OPT-NEXT: [[TMP1:%.*]] = ashr i64 [[CAV1_SROA_0_1_INSERT]], 26 113; OPT-NEXT: ret i64 [[TMP1]] 114; 115 %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22 116 %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26 117 ret i64 %tmp1 118} 119 120define void @fct4(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 121; LLC-LABEL: fct4: 122; LLC: // %bb.0: // %entry 123; LLC-NEXT: ldr x8, [x0] 124; LLC-NEXT: bfxil x8, x1, #16, #24 125; LLC-NEXT: str x8, [x0] 126; LLC-NEXT: ret 127; OPT-LABEL: @fct4( 128; OPT-NEXT: entry: 129; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 130; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -16777216 131; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 132; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 16777215 133; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 134; OPT-NEXT: store i64 [[OR]], ptr [[Y]], align 8 135; OPT-NEXT: ret void 136; 137entry: 138 %0 = load i64, ptr %y, align 8 139 %and = and i64 %0, -16777216 140 %shr = lshr i64 %x, 16 141 %and1 = and i64 %shr, 16777215 142 %or = or i64 %and, %and1 143 store i64 %or, ptr %y, align 8 144 ret void 145} 146 147define void @fct5(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 148; LLC-LABEL: fct5: 149; LLC: // %bb.0: // %entry 150; LLC-NEXT: ldr w8, [x0] 151; LLC-NEXT: bfxil w8, w1, #16, #3 152; LLC-NEXT: str w8, [x0] 153; LLC-NEXT: ret 154; OPT-LABEL: @fct5( 155; OPT-NEXT: entry: 156; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 157; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 158; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 159; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 160; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 161; OPT-NEXT: store i32 [[OR]], ptr [[Y]], align 8 162; OPT-NEXT: ret void 163; 164entry: 165 %0 = load i32, ptr %y, align 8 166 %and = and i32 %0, -8 167 %shr = lshr i32 %x, 16 168 %and1 = and i32 %shr, 7 169 %or = or i32 %and, %and1 170 store i32 %or, ptr %y, align 8 171 ret void 172} 173 174; Check if we can still catch bfm instruction when we drop some low bits 175define void @fct6(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 176; LLC-LABEL: fct6: 177; LLC: // %bb.0: // %entry 178; LLC-NEXT: ldr w8, [x0] 179; LLC-NEXT: bfxil w8, w1, #16, #3 180; LLC-NEXT: lsr w8, w8, #2 181; LLC-NEXT: str w8, [x0] 182; LLC-NEXT: ret 183; OPT-LABEL: @fct6( 184; OPT-NEXT: entry: 185; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 186; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 187; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 188; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 189; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 190; OPT-NEXT: [[SHR1:%.*]] = lshr i32 [[OR]], 2 191; OPT-NEXT: store i32 [[SHR1]], ptr [[Y]], align 8 192; OPT-NEXT: ret void 193; 194entry: 195; lsr is an alias of ubfm 196 %0 = load i32, ptr %y, align 8 197 %and = and i32 %0, -8 198 %shr = lshr i32 %x, 16 199 %and1 = and i32 %shr, 7 200 %or = or i32 %and, %and1 201 %shr1 = lshr i32 %or, 2 202 store i32 %shr1, ptr %y, align 8 203 ret void 204} 205 206 207; Check if we can still catch bfm instruction when we drop some high bits 208define void @fct7(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 209; LLC-LABEL: fct7: 210; LLC: // %bb.0: // %entry 211; LLC-NEXT: ldr w8, [x0] 212; LLC-NEXT: bfxil w8, w1, #16, #3 213; LLC-NEXT: lsl w8, w8, #2 214; LLC-NEXT: str w8, [x0] 215; LLC-NEXT: ret 216; OPT-LABEL: @fct7( 217; OPT-NEXT: entry: 218; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 219; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 220; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 221; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 222; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 223; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 224; OPT-NEXT: store i32 [[SHL]], ptr [[Y]], align 8 225; OPT-NEXT: ret void 226; 227entry: 228; lsl is an alias of ubfm 229 %0 = load i32, ptr %y, align 8 230 %and = and i32 %0, -8 231 %shr = lshr i32 %x, 16 232 %and1 = and i32 %shr, 7 233 %or = or i32 %and, %and1 234 %shl = shl i32 %or, 2 235 store i32 %shl, ptr %y, align 8 236 ret void 237} 238 239 240; Check if we can still catch bfm instruction when we drop some low bits 241; (i64 version) 242define void @fct8(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 243; LLC-LABEL: fct8: 244; LLC: // %bb.0: // %entry 245; LLC-NEXT: ldr x8, [x0] 246; LLC-NEXT: bfxil x8, x1, #16, #3 247; LLC-NEXT: lsr x8, x8, #2 248; LLC-NEXT: str x8, [x0] 249; LLC-NEXT: ret 250; OPT-LABEL: @fct8( 251; OPT-NEXT: entry: 252; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 253; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -8 254; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 255; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 256; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 257; OPT-NEXT: [[SHR1:%.*]] = lshr i64 [[OR]], 2 258; OPT-NEXT: store i64 [[SHR1]], ptr [[Y]], align 8 259; OPT-NEXT: ret void 260; 261entry: 262; lsr is an alias of ubfm 263 %0 = load i64, ptr %y, align 8 264 %and = and i64 %0, -8 265 %shr = lshr i64 %x, 16 266 %and1 = and i64 %shr, 7 267 %or = or i64 %and, %and1 268 %shr1 = lshr i64 %or, 2 269 store i64 %shr1, ptr %y, align 8 270 ret void 271} 272 273 274; Check if we can still catch bfm instruction when we drop some high bits 275; (i64 version) 276define void @fct9(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 277; LLC-LABEL: fct9: 278; LLC: // %bb.0: // %entry 279; LLC-NEXT: ldr x8, [x0] 280; LLC-NEXT: bfxil x8, x1, #16, #3 281; LLC-NEXT: lsl x8, x8, #2 282; LLC-NEXT: str x8, [x0] 283; LLC-NEXT: ret 284; OPT-LABEL: @fct9( 285; OPT-NEXT: entry: 286; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 287; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -8 288; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 289; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 290; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 291; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 292; OPT-NEXT: store i64 [[SHL]], ptr [[Y]], align 8 293; OPT-NEXT: ret void 294; 295entry: 296; lsr is an alias of ubfm 297 %0 = load i64, ptr %y, align 8 298 %and = and i64 %0, -8 299 %shr = lshr i64 %x, 16 300 %and1 = and i64 %shr, 7 301 %or = or i64 %and, %and1 302 %shl = shl i64 %or, 2 303 store i64 %shl, ptr %y, align 8 304 ret void 305} 306 307; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) 308; (i32 version) 309define void @fct10(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 310; LLC-LABEL: fct10: 311; LLC: // %bb.0: // %entry 312; LLC-NEXT: ldr w8, [x0] 313; LLC-NEXT: bfxil w8, w1, #0, #3 314; LLC-NEXT: lsl w8, w8, #2 315; LLC-NEXT: str w8, [x0] 316; LLC-NEXT: ret 317; OPT-LABEL: @fct10( 318; OPT-NEXT: entry: 319; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 320; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 321; OPT-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], 7 322; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 323; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 324; OPT-NEXT: store i32 [[SHL]], ptr [[Y]], align 8 325; OPT-NEXT: ret void 326; 327entry: 328; lsl is an alias of ubfm 329 %0 = load i32, ptr %y, align 8 330 %and = and i32 %0, -8 331 %and1 = and i32 %x, 7 332 %or = or i32 %and, %and1 333 %shl = shl i32 %or, 2 334 store i32 %shl, ptr %y, align 8 335 ret void 336} 337 338; Check if we can catch bfm instruction when lsb is 0 (i.e., no lshr) 339; (i64 version) 340define void @fct11(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 341; LLC-LABEL: fct11: 342; LLC: // %bb.0: // %entry 343; LLC-NEXT: ldr x8, [x0] 344; LLC-NEXT: bfxil x8, x1, #0, #3 345; LLC-NEXT: lsl x8, x8, #2 346; LLC-NEXT: str x8, [x0] 347; LLC-NEXT: ret 348; OPT-LABEL: @fct11( 349; OPT-NEXT: entry: 350; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 351; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -8 352; OPT-NEXT: [[AND1:%.*]] = and i64 [[X:%.*]], 7 353; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 354; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 355; OPT-NEXT: store i64 [[SHL]], ptr [[Y]], align 8 356; OPT-NEXT: ret void 357; 358entry: 359; lsl is an alias of ubfm 360 %0 = load i64, ptr %y, align 8 361 %and = and i64 %0, -8 362 %and1 = and i64 %x, 7 363 %or = or i64 %and, %and1 364 %shl = shl i64 %or, 2 365 store i64 %shl, ptr %y, align 8 366 ret void 367} 368 369define zeroext i1 @fct12bis(i32 %tmp2) unnamed_addr nounwind ssp align 2 { 370; LLC-LABEL: fct12bis: 371; LLC: // %bb.0: 372; LLC-NEXT: ubfx w0, w0, #11, #1 373; LLC-NEXT: ret 374; OPT-LABEL: @fct12bis( 375; OPT-NEXT: [[AND_I_I:%.*]] = and i32 [[TMP2:%.*]], 2048 376; OPT-NEXT: [[TOBOOL_I_I:%.*]] = icmp ne i32 [[AND_I_I]], 0 377; OPT-NEXT: ret i1 [[TOBOOL_I_I]] 378; 379 %and.i.i = and i32 %tmp2, 2048 380 %tobool.i.i = icmp ne i32 %and.i.i, 0 381 ret i1 %tobool.i.i 382} 383 384; Check if we can still catch bfm instruction when we drop some high bits 385; and some low bits 386define void @fct12(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 387; LLC-LABEL: fct12: 388; LLC: // %bb.0: // %entry 389; LLC-NEXT: ldr w8, [x0] 390; LLC-NEXT: bfxil w8, w1, #16, #3 391; LLC-NEXT: ubfx w8, w8, #2, #28 392; LLC-NEXT: str w8, [x0] 393; LLC-NEXT: ret 394; OPT-LABEL: @fct12( 395; OPT-NEXT: entry: 396; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 397; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 398; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 399; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 400; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 401; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 402; OPT-NEXT: [[SHR2:%.*]] = lshr i32 [[SHL]], 4 403; OPT-NEXT: store i32 [[SHR2]], ptr [[Y]], align 8 404; OPT-NEXT: ret void 405; 406entry: 407; lsr is an alias of ubfm 408 %0 = load i32, ptr %y, align 8 409 %and = and i32 %0, -8 410 %shr = lshr i32 %x, 16 411 %and1 = and i32 %shr, 7 412 %or = or i32 %and, %and1 413 %shl = shl i32 %or, 2 414 %shr2 = lshr i32 %shl, 4 415 store i32 %shr2, ptr %y, align 8 416 ret void 417} 418define void @fct12_mask(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 419; LLC-LABEL: fct12_mask: 420; LLC: // %bb.0: // %entry 421; LLC-NEXT: ldr w8, [x0] 422; LLC-NEXT: and w8, w8, #0x3ffffff8 423; LLC-NEXT: bfxil w8, w1, #16, #3 424; LLC-NEXT: lsr w8, w8, #2 425; LLC-NEXT: str w8, [x0] 426; LLC-NEXT: ret 427; OPT-LABEL: @fct12_mask( 428; OPT-NEXT: entry: 429; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 430; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -8 431; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 432; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 433; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 434; OPT-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], 2 435; OPT-NEXT: [[MASK:%.*]] = and i32 [[LSHR]], 268435455 436; OPT-NEXT: store i32 [[MASK]], ptr [[Y]], align 8 437; OPT-NEXT: ret void 438; 439entry: 440; lsr is an alias of ubfm 441 %0 = load i32, ptr %y, align 8 442 %and = and i32 %0, -8 443 %shr = lshr i32 %x, 16 444 %and1 = and i32 %shr, 7 445 %or = or i32 %and, %and1 446 %lshr = lshr i32 %or, 2 447 %mask = and i32 %lshr, 268435455 448 store i32 %mask, ptr %y, align 8 449 ret void 450} 451 452; Check if we can still catch bfm instruction when we drop some high bits 453; and some low bits 454; (i64 version) 455define void @fct13(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 456; LLC-LABEL: fct13: 457; LLC: // %bb.0: // %entry 458; LLC-NEXT: ldr x8, [x0] 459; LLC-NEXT: bfxil x8, x1, #16, #3 460; LLC-NEXT: ubfx x8, x8, #2, #60 461; LLC-NEXT: str x8, [x0] 462; LLC-NEXT: ret 463; OPT-LABEL: @fct13( 464; OPT-NEXT: entry: 465; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 466; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -8 467; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 468; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 469; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 470; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 471; OPT-NEXT: [[SHR2:%.*]] = lshr i64 [[SHL]], 4 472; OPT-NEXT: store i64 [[SHR2]], ptr [[Y]], align 8 473; OPT-NEXT: ret void 474; 475entry: 476; lsr is an alias of ubfm 477 %0 = load i64, ptr %y, align 8 478 %and = and i64 %0, -8 479 %shr = lshr i64 %x, 16 480 %and1 = and i64 %shr, 7 481 %or = or i64 %and, %and1 482 %shl = shl i64 %or, 2 483 %shr2 = lshr i64 %shl, 4 484 store i64 %shr2, ptr %y, align 8 485 ret void 486} 487define void @fct13_mask(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 488; LLC-LABEL: fct13_mask: 489; LLC: // %bb.0: // %entry 490; LLC-NEXT: ldr x8, [x0] 491; LLC-NEXT: and x8, x8, #0x3ffffffffffffff8 492; LLC-NEXT: bfxil x8, x1, #16, #3 493; LLC-NEXT: lsr x8, x8, #2 494; LLC-NEXT: str x8, [x0] 495; LLC-NEXT: ret 496; OPT-LABEL: @fct13_mask( 497; OPT-NEXT: entry: 498; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 499; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -8 500; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 501; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 502; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 503; OPT-NEXT: [[LSHR:%.*]] = lshr i64 [[OR]], 2 504; OPT-NEXT: [[MASK:%.*]] = and i64 [[LSHR]], 1152921504606846975 505; OPT-NEXT: store i64 [[MASK]], ptr [[Y]], align 8 506; OPT-NEXT: ret void 507; 508entry: 509; lsr is an alias of ubfm 510 %0 = load i64, ptr %y, align 8 511 %and = and i64 %0, -8 512 %shr = lshr i64 %x, 16 513 %and1 = and i64 %shr, 7 514 %or = or i64 %and, %and1 515 %lshr = lshr i64 %or, 2 516 %mask = and i64 %lshr, 1152921504606846975 517 store i64 %mask, ptr %y, align 8 518 ret void 519} 520 521 522; Check if we can still catch bfm instruction when we drop some high bits 523; and some low bits 524define void @fct14(ptr nocapture %y, i32 %x, i32 %x1) nounwind optsize inlinehint ssp { 525; LLC-LABEL: fct14: 526; LLC: // %bb.0: // %entry 527; LLC-NEXT: ldr w8, [x0] 528; LLC-NEXT: bfxil w8, w1, #16, #8 529; LLC-NEXT: lsr w8, w8, #4 530; LLC-NEXT: bfxil w8, w2, #5, #3 531; LLC-NEXT: lsl w8, w8, #2 532; LLC-NEXT: str w8, [x0] 533; LLC-NEXT: ret 534; OPT-LABEL: @fct14( 535; OPT-NEXT: entry: 536; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 537; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -256 538; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 539; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 255 540; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 541; OPT-NEXT: [[SHL:%.*]] = lshr i32 [[OR]], 4 542; OPT-NEXT: [[AND2:%.*]] = and i32 [[SHL]], -8 543; OPT-NEXT: [[SHR1:%.*]] = lshr i32 [[X1:%.*]], 5 544; OPT-NEXT: [[AND3:%.*]] = and i32 [[SHR1]], 7 545; OPT-NEXT: [[OR1:%.*]] = or i32 [[AND2]], [[AND3]] 546; OPT-NEXT: [[SHL1:%.*]] = shl i32 [[OR1]], 2 547; OPT-NEXT: store i32 [[SHL1]], ptr [[Y]], align 8 548; OPT-NEXT: ret void 549; 550entry: 551; lsr is an alias of ubfm 552; lsl is an alias of ubfm 553 %0 = load i32, ptr %y, align 8 554 %and = and i32 %0, -256 555 %shr = lshr i32 %x, 16 556 %and1 = and i32 %shr, 255 557 %or = or i32 %and, %and1 558 %shl = lshr i32 %or, 4 559 %and2 = and i32 %shl, -8 560 %shr1 = lshr i32 %x1, 5 561 %and3 = and i32 %shr1, 7 562 %or1 = or i32 %and2, %and3 563 %shl1 = shl i32 %or1, 2 564 store i32 %shl1, ptr %y, align 8 565 ret void 566} 567 568; Check if we can still catch bfm instruction when we drop some high bits 569; and some low bits 570; (i64 version) 571define void @fct15(ptr nocapture %y, i64 %x, i64 %x1) nounwind optsize inlinehint ssp { 572; LLC-LABEL: fct15: 573; LLC: // %bb.0: // %entry 574; LLC-NEXT: ldr x8, [x0] 575; LLC-NEXT: bfxil x8, x1, #16, #8 576; LLC-NEXT: lsr x8, x8, #4 577; LLC-NEXT: bfxil x8, x2, #5, #3 578; LLC-NEXT: lsl x8, x8, #2 579; LLC-NEXT: str x8, [x0] 580; LLC-NEXT: ret 581; OPT-LABEL: @fct15( 582; OPT-NEXT: entry: 583; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 584; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -256 585; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 586; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 255 587; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 588; OPT-NEXT: [[SHL:%.*]] = lshr i64 [[OR]], 4 589; OPT-NEXT: [[AND2:%.*]] = and i64 [[SHL]], -8 590; OPT-NEXT: [[SHR1:%.*]] = lshr i64 [[X1:%.*]], 5 591; OPT-NEXT: [[AND3:%.*]] = and i64 [[SHR1]], 7 592; OPT-NEXT: [[OR1:%.*]] = or i64 [[AND2]], [[AND3]] 593; OPT-NEXT: [[SHL1:%.*]] = shl i64 [[OR1]], 2 594; OPT-NEXT: store i64 [[SHL1]], ptr [[Y]], align 8 595; OPT-NEXT: ret void 596; 597entry: 598; lsr is an alias of ubfm 599; lsl is an alias of ubfm 600 %0 = load i64, ptr %y, align 8 601 %and = and i64 %0, -256 602 %shr = lshr i64 %x, 16 603 %and1 = and i64 %shr, 255 604 %or = or i64 %and, %and1 605 %shl = lshr i64 %or, 4 606 %and2 = and i64 %shl, -8 607 %shr1 = lshr i64 %x1, 5 608 %and3 = and i64 %shr1, 7 609 %or1 = or i64 %and2, %and3 610 %shl1 = shl i64 %or1, 2 611 store i64 %shl1, ptr %y, align 8 612 ret void 613} 614 615; Check if we can still catch bfm instruction when we drop some high bits 616; and some low bits and a masking operation has to be kept 617define void @fct16(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 618; LLC-LABEL: fct16: 619; LLC: // %bb.0: // %entry 620; LLC-NEXT: ldr w8, [x0] 621; LLC-NEXT: mov w9, #33120 622; LLC-NEXT: movk w9, #26, lsl #16 623; LLC-NEXT: and w8, w8, w9 624; LLC-NEXT: bfxil w8, w1, #16, #3 625; LLC-NEXT: ubfx w8, w8, #2, #28 626; LLC-NEXT: str w8, [x0] 627; LLC-NEXT: ret 628; OPT-LABEL: @fct16( 629; OPT-NEXT: entry: 630; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 631; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 1737056 632; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 633; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 634; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 635; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 636; OPT-NEXT: [[SHR2:%.*]] = lshr i32 [[SHL]], 4 637; OPT-NEXT: store i32 [[SHR2]], ptr [[Y]], align 8 638; OPT-NEXT: ret void 639; 640entry: 641; Create the constant 642; Do the masking 643; lsr is an alias of ubfm 644 %0 = load i32, ptr %y, align 8 645 %and = and i32 %0, 1737056 646 %shr = lshr i32 %x, 16 647 %and1 = and i32 %shr, 7 648 %or = or i32 %and, %and1 649 %shl = shl i32 %or, 2 650 %shr2 = lshr i32 %shl, 4 651 store i32 %shr2, ptr %y, align 8 652 ret void 653} 654define void @fct16_mask(ptr nocapture %y, i32 %x) nounwind optsize inlinehint ssp { 655; LLC-LABEL: fct16_mask: 656; LLC: // %bb.0: // %entry 657; LLC-NEXT: ldr w8, [x0] 658; LLC-NEXT: mov w9, #33120 659; LLC-NEXT: movk w9, #26, lsl #16 660; LLC-NEXT: and w8, w8, w9 661; LLC-NEXT: bfxil w8, w1, #16, #3 662; LLC-NEXT: lsr w8, w8, #2 663; LLC-NEXT: str w8, [x0] 664; LLC-NEXT: ret 665; OPT-LABEL: @fct16_mask( 666; OPT-NEXT: entry: 667; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[Y:%.*]], align 8 668; OPT-NEXT: [[AND:%.*]] = and i32 [[TMP0]], 1737056 669; OPT-NEXT: [[SHR:%.*]] = lshr i32 [[X:%.*]], 16 670; OPT-NEXT: [[AND1:%.*]] = and i32 [[SHR]], 7 671; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] 672; OPT-NEXT: [[LSHR:%.*]] = lshr i32 [[OR]], 2 673; OPT-NEXT: [[MASK:%.*]] = and i32 [[LSHR]], 268435455 674; OPT-NEXT: store i32 [[MASK]], ptr [[Y]], align 8 675; OPT-NEXT: ret void 676; 677entry: 678; Create the constant 679; Do the masking 680; lsr is an alias of ubfm 681 %0 = load i32, ptr %y, align 8 682 %and = and i32 %0, 1737056 683 %shr = lshr i32 %x, 16 684 %and1 = and i32 %shr, 7 685 %or = or i32 %and, %and1 686 %lshr = lshr i32 %or, 2 687 %mask = and i32 %lshr, 268435455 688 store i32 %mask, ptr %y, align 8 689 ret void 690} 691 692 693; Check if we can still catch bfm instruction when we drop some high bits 694; and some low bits and a masking operation has to be kept 695; (i64 version) 696define void @fct17(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 697; LLC-LABEL: fct17: 698; LLC: // %bb.0: // %entry 699; LLC-NEXT: ldr x8, [x0] 700; LLC-NEXT: mov w9, #33120 701; LLC-NEXT: movk w9, #26, lsl #16 702; LLC-NEXT: and x8, x8, x9 703; LLC-NEXT: bfxil x8, x1, #16, #3 704; LLC-NEXT: ubfx x8, x8, #2, #60 705; LLC-NEXT: str x8, [x0] 706; LLC-NEXT: ret 707; OPT-LABEL: @fct17( 708; OPT-NEXT: entry: 709; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 710; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], 1737056 711; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 712; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 713; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 714; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 715; OPT-NEXT: [[SHR2:%.*]] = lshr i64 [[SHL]], 4 716; OPT-NEXT: store i64 [[SHR2]], ptr [[Y]], align 8 717; OPT-NEXT: ret void 718; 719entry: 720; Create the constant 721; Do the masking 722; lsr is an alias of ubfm 723 %0 = load i64, ptr %y, align 8 724 %and = and i64 %0, 1737056 725 %shr = lshr i64 %x, 16 726 %and1 = and i64 %shr, 7 727 %or = or i64 %and, %and1 728 %shl = shl i64 %or, 2 729 %shr2 = lshr i64 %shl, 4 730 store i64 %shr2, ptr %y, align 8 731 ret void 732} 733define void @fct17_mask(ptr nocapture %y, i64 %x) nounwind optsize inlinehint ssp { 734; LLC-LABEL: fct17_mask: 735; LLC: // %bb.0: // %entry 736; LLC-NEXT: ldr x8, [x0] 737; LLC-NEXT: mov w9, #33120 738; LLC-NEXT: movk w9, #26, lsl #16 739; LLC-NEXT: and x8, x8, x9 740; LLC-NEXT: bfxil x8, x1, #16, #3 741; LLC-NEXT: lsr x8, x8, #2 742; LLC-NEXT: str x8, [x0] 743; LLC-NEXT: ret 744; OPT-LABEL: @fct17_mask( 745; OPT-NEXT: entry: 746; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[Y:%.*]], align 8 747; OPT-NEXT: [[AND:%.*]] = and i64 [[TMP0]], 1737056 748; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 16 749; OPT-NEXT: [[AND1:%.*]] = and i64 [[SHR]], 7 750; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] 751; OPT-NEXT: [[LSHR:%.*]] = lshr i64 [[OR]], 2 752; OPT-NEXT: [[MASK:%.*]] = and i64 [[LSHR]], 1152921504606846975 753; OPT-NEXT: store i64 [[MASK]], ptr [[Y]], align 8 754; OPT-NEXT: ret void 755; 756entry: 757; Create the constant 758; Do the masking 759; lsr is an alias of ubfm 760 %0 = load i64, ptr %y, align 8 761 %and = and i64 %0, 1737056 762 %shr = lshr i64 %x, 16 763 %and1 = and i64 %shr, 7 764 %or = or i64 %and, %and1 765 %lshr = lshr i64 %or, 2 766 %mask = and i64 %lshr, 1152921504606846975 767 store i64 %mask, ptr %y, align 8 768 ret void 769} 770 771define i64 @fct18(i32 %xor72) nounwind ssp { 772; LLC-LABEL: fct18: 773; LLC: // %bb.0: 774; LLC-NEXT: // kill: def $w0 killed $w0 def $x0 775; LLC-NEXT: ubfx x0, x0, #9, #8 776; LLC-NEXT: ret 777; OPT-LABEL: @fct18( 778; OPT-NEXT: [[SHR81:%.*]] = lshr i32 [[XOR72:%.*]], 9 779; OPT-NEXT: [[CONV82:%.*]] = zext i32 [[SHR81]] to i64 780; OPT-NEXT: [[RESULT:%.*]] = and i64 [[CONV82]], 255 781; OPT-NEXT: ret i64 [[RESULT]] 782; 783 %shr81 = lshr i32 %xor72, 9 784 %conv82 = zext i32 %shr81 to i64 785 %result = and i64 %conv82, 255 786 ret i64 %result 787} 788 789; Using the access to the global array to keep the instruction and control flow. 790@first_ones = external dso_local global [65536 x i8] 791 792; Function Attrs: nounwind readonly ssp 793define i32 @fct19(i64 %arg1) nounwind readonly ssp { 794; LLC-LABEL: fct19: 795; LLC: // %bb.0: // %entry 796; LLC-NEXT: lsr x8, x0, #48 797; LLC-NEXT: cbz x8, .LBB26_2 798; LLC-NEXT: // %bb.1: // %if.then 799; LLC-NEXT: adrp x9, first_ones 800; LLC-NEXT: add x9, x9, :lo12:first_ones 801; LLC-NEXT: ldrb w0, [x9, x8] 802; LLC-NEXT: ret 803; LLC-NEXT: .LBB26_2: // %if.end 804; LLC-NEXT: ubfx x8, x0, #32, #16 805; LLC-NEXT: cbz w8, .LBB26_4 806; LLC-NEXT: // %bb.3: // %if.then7 807; LLC-NEXT: adrp x9, first_ones 808; LLC-NEXT: add x9, x9, :lo12:first_ones 809; LLC-NEXT: ldrb w8, [x9, x8] 810; LLC-NEXT: add w0, w8, #16 811; LLC-NEXT: ret 812; LLC-NEXT: .LBB26_4: // %if.end13 813; LLC-NEXT: lsr w8, w0, #16 814; LLC-NEXT: cbz w8, .LBB26_6 815; LLC-NEXT: // %bb.5: // %if.then17 816; LLC-NEXT: adrp x9, first_ones 817; LLC-NEXT: add x9, x9, :lo12:first_ones 818; LLC-NEXT: ldrb w8, [x9, x8] 819; LLC-NEXT: add w0, w8, #32 820; LLC-NEXT: ret 821; LLC-NEXT: .LBB26_6: 822; LLC-NEXT: mov w0, #64 823; LLC-NEXT: ret 824; OPT-LABEL: @fct19( 825; OPT-NEXT: entry: 826; OPT-NEXT: [[X_SROA_1_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[ARG1:%.*]], 16 827; OPT-NEXT: [[X_SROA_1_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[X_SROA_1_0_EXTRACT_SHIFT]] to i16 828; OPT-NEXT: [[X_SROA_5_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[ARG1]], 48 829; OPT-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[X_SROA_5_0_EXTRACT_SHIFT]], 0 830; OPT-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] 831; OPT: if.then: 832; OPT-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 [[X_SROA_5_0_EXTRACT_SHIFT]] 833; OPT-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 834; OPT-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 835; OPT-NEXT: br label [[RETURN:%.*]] 836; OPT: if.end: 837; OPT-NEXT: [[TMP1:%.*]] = lshr i64 [[ARG1]], 32 838; OPT-NEXT: [[X_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP1]] to i16 839; OPT-NEXT: [[TOBOOL6:%.*]] = icmp eq i16 [[X_SROA_3_0_EXTRACT_TRUNC]], 0 840; OPT-NEXT: br i1 [[TOBOOL6]], label [[IF_END13:%.*]], label [[IF_THEN7:%.*]] 841; OPT: if.then7: 842; OPT-NEXT: [[TMP2:%.*]] = lshr i64 [[ARG1]], 32 843; OPT-NEXT: [[IDXPROM10:%.*]] = and i64 [[TMP2]], 65535 844; OPT-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 [[IDXPROM10]] 845; OPT-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX11]], align 1 846; OPT-NEXT: [[CONV12:%.*]] = zext i8 [[TMP3]] to i32 847; OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV12]], 16 848; OPT-NEXT: br label [[RETURN]] 849; OPT: if.end13: 850; OPT-NEXT: [[TMP4:%.*]] = lshr i64 [[ARG1]], 16 851; OPT-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i16 852; OPT-NEXT: [[TOBOOL16:%.*]] = icmp eq i16 [[TMP5]], 0 853; OPT-NEXT: br i1 [[TOBOOL16]], label [[RETURN]], label [[IF_THEN17:%.*]] 854; OPT: if.then17: 855; OPT-NEXT: [[TMP6:%.*]] = lshr i64 [[ARG1]], 16 856; OPT-NEXT: [[IDXPROM20:%.*]] = and i64 [[TMP6]], 65535 857; OPT-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 [[IDXPROM20]] 858; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX21]], align 1 859; OPT-NEXT: [[CONV22:%.*]] = zext i8 [[TMP7]] to i32 860; OPT-NEXT: [[ADD23:%.*]] = add nsw i32 [[CONV22]], 32 861; OPT-NEXT: br label [[RETURN]] 862; OPT: return: 863; OPT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CONV]], [[IF_THEN]] ], [ [[ADD]], [[IF_THEN7]] ], [ [[ADD23]], [[IF_THEN17]] ], [ 64, [[IF_END13]] ] 864; OPT-NEXT: ret i32 [[RETVAL_0]] 865; 866entry: 867 %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16 868 %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16 869 %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32 870 %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48 871 %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0 872 br i1 %tobool, label %if.end, label %if.then 873 874if.then: ; preds = %entry 875 %arrayidx3 = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift 876 %0 = load i8, ptr %arrayidx3, align 1 877 %conv = zext i8 %0 to i32 878 br label %return 879 880if.end: ; preds = %entry 881 %x.sroa.3.0.extract.trunc = trunc i64 %x.sroa.3.0.extract.shift to i16 882 %tobool6 = icmp eq i16 %x.sroa.3.0.extract.trunc, 0 883 br i1 %tobool6, label %if.end13, label %if.then7 884 885if.then7: ; preds = %if.end 886; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 887; So neither of them should be in the assemble code. 888 %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535 889 %arrayidx11 = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 %idxprom10 890 %1 = load i8, ptr %arrayidx11, align 1 891 %conv12 = zext i8 %1 to i32 892 %add = add nsw i32 %conv12, 16 893 br label %return 894 895if.end13: ; preds = %if.end 896 %tobool16 = icmp eq i16 %x.sroa.1.0.extract.trunc, 0 897 br i1 %tobool16, label %return, label %if.then17 898 899if.then17: ; preds = %if.end13 900; "and" should be combined to "ubfm" while "ubfm" should be removed by cse. 901; So neither of them should be in the assemble code. 902 %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535 903 %arrayidx21 = getelementptr inbounds [65536 x i8], ptr @first_ones, i64 0, i64 %idxprom20 904 %2 = load i8, ptr %arrayidx21, align 1 905 %conv22 = zext i8 %2 to i32 906 %add23 = add nsw i32 %conv22, 32 907 br label %return 908 909return: ; preds = %if.end13, %if.then17, %if.then7, %if.then 910 %retval.0 = phi i32 [ %conv, %if.then ], [ %add, %if.then7 ], [ %add23, %if.then17 ], [ 64, %if.end13 ] 911 ret i32 %retval.0 912} 913 914; Make sure we do not assert if the immediate in and is bigger than i64. 915; PR19503. 916define i80 @fct20(i128 %a, i128 %b) { 917; LLC-LABEL: fct20: 918; LLC: // %bb.0: // %entry 919; LLC-NEXT: mov x12, #11776 // =0x2e00 920; LLC-NEXT: lsr x8, x1, #18 921; LLC-NEXT: extr x9, x1, x0, #18 922; LLC-NEXT: movk x12, #25856, lsl #16 923; LLC-NEXT: orr x10, x2, x3 924; LLC-NEXT: mov w11, #26220 // =0x666c 925; LLC-NEXT: movk x12, #11077, lsl #32 926; LLC-NEXT: and x11, x8, x11 927; LLC-NEXT: cmp x10, #0 928; LLC-NEXT: movk x12, #45, lsl #48 929; LLC-NEXT: csel x1, x11, x8, eq 930; LLC-NEXT: and x12, x9, x12 931; LLC-NEXT: csel x0, x12, x9, eq 932; LLC-NEXT: ret 933; OPT-LABEL: @fct20( 934; OPT-NEXT: entry: 935; OPT-NEXT: [[SHR:%.*]] = lshr i128 [[A:%.*]], 18 936; OPT-NEXT: [[CONV:%.*]] = trunc i128 [[SHR]] to i80 937; OPT-NEXT: [[TOBOOL:%.*]] = icmp eq i128 [[B:%.*]], 0 938; OPT-NEXT: br i1 [[TOBOOL]], label [[THEN:%.*]], label [[END:%.*]] 939; OPT: then: 940; OPT-NEXT: [[AND:%.*]] = and i128 [[SHR]], 483673642326615442599424 941; OPT-NEXT: [[CONV2:%.*]] = trunc i128 [[AND]] to i80 942; OPT-NEXT: br label [[END]] 943; OPT: end: 944; OPT-NEXT: [[CONV3:%.*]] = phi i80 [ [[CONV]], [[ENTRY:%.*]] ], [ [[CONV2]], [[THEN]] ] 945; OPT-NEXT: ret i80 [[CONV3]] 946; 947entry: 948 %shr = lshr i128 %a, 18 949 %conv = trunc i128 %shr to i80 950 %tobool = icmp eq i128 %b, 0 951 br i1 %tobool, label %then, label %end 952then: 953 %and = and i128 %shr, 483673642326615442599424 954 %conv2 = trunc i128 %and to i80 955 br label %end 956end: 957 %conv3 = phi i80 [%conv, %entry], [%conv2, %then] 958 ret i80 %conv3 959} 960 961; Check if we can still catch UBFX when "AND" is used by SHL. 962@arr = external dso_local global [8 x [64 x i64]] 963define i64 @fct21(i64 %x) { 964; LLC-LABEL: fct21: 965; LLC: // %bb.0: // %entry 966; LLC-NEXT: ubfx x8, x0, #4, #4 967; LLC-NEXT: adrp x9, arr 968; LLC-NEXT: add x9, x9, :lo12:arr 969; LLC-NEXT: ldr x0, [x9, x8, lsl #3] 970; LLC-NEXT: ret 971; OPT-LABEL: @fct21( 972; OPT-NEXT: entry: 973; OPT-NEXT: [[SHR:%.*]] = lshr i64 [[X:%.*]], 4 974; OPT-NEXT: [[AND:%.*]] = and i64 [[SHR]], 15 975; OPT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x [64 x i64]], ptr @arr, i64 0, i64 0, i64 [[AND]] 976; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 977; OPT-NEXT: ret i64 [[TMP0]] 978; 979entry: 980 %shr = lshr i64 %x, 4 981 %and = and i64 %shr, 15 982 %arrayidx = getelementptr inbounds [8 x [64 x i64]], ptr @arr, i64 0, i64 0, i64 %and 983 %0 = load i64, ptr %arrayidx, align 8 984 ret i64 %0 985} 986 987define i16 @test_ignored_rightbits(i32 %dst, i32 %in) { 988; LLC-LABEL: test_ignored_rightbits: 989; LLC: // %bb.0: 990; LLC-NEXT: and w8, w0, #0x7 991; LLC-NEXT: bfi w8, w1, #3, #4 992; LLC-NEXT: orr w0, w8, w8, lsl #8 993; LLC-NEXT: ret 994; OPT-LABEL: @test_ignored_rightbits( 995; OPT-NEXT: [[POSITIONED_FIELD:%.*]] = shl i32 [[IN:%.*]], 3 996; OPT-NEXT: [[POSITIONED_MASKED_FIELD:%.*]] = and i32 [[POSITIONED_FIELD]], 120 997; OPT-NEXT: [[MASKED_DST:%.*]] = and i32 [[DST:%.*]], 7 998; OPT-NEXT: [[INSERTION:%.*]] = or i32 [[MASKED_DST]], [[POSITIONED_MASKED_FIELD]] 999; OPT-NEXT: [[SHL16:%.*]] = shl i32 [[INSERTION]], 8 1000; OPT-NEXT: [[OR18:%.*]] = or i32 [[SHL16]], [[INSERTION]] 1001; OPT-NEXT: [[CONV19:%.*]] = trunc i32 [[OR18]] to i16 1002; OPT-NEXT: ret i16 [[CONV19]] 1003; 1004 %positioned_field = shl i32 %in, 3 1005 %positioned_masked_field = and i32 %positioned_field, 120 1006 %masked_dst = and i32 %dst, 7 1007 %insertion = or i32 %masked_dst, %positioned_masked_field 1008 1009 %shl16 = shl i32 %insertion, 8 1010 %or18 = or i32 %shl16, %insertion 1011 %conv19 = trunc i32 %or18 to i16 1012 1013 ret i16 %conv19 1014} 1015 1016; The following test excercises the case where we have a BFI 1017; instruction with the same input in both operands. We need to 1018; track the useful bits through both operands. 1019define void @sameOperandBFI(i64 %src, i64 %src2, ptr %ptr) { 1020; LLC-LABEL: sameOperandBFI: 1021; LLC: // %bb.0: // %entry 1022; LLC-NEXT: cbnz wzr, .LBB30_2 1023; LLC-NEXT: // %bb.1: // %if.else 1024; LLC-NEXT: lsr x8, x0, #47 1025; LLC-NEXT: and w9, w1, #0x3 1026; LLC-NEXT: bfi w9, w8, #2, #2 1027; LLC-NEXT: orr w8, w9, w9, lsl #4 1028; LLC-NEXT: strh w8, [x2] 1029; LLC-NEXT: .LBB30_2: // %end 1030; LLC-NEXT: ret 1031; OPT-LABEL: @sameOperandBFI( 1032; OPT-NEXT: entry: 1033; OPT-NEXT: [[SHR47:%.*]] = lshr i64 [[SRC:%.*]], 47 1034; OPT-NEXT: [[SRC2_TRUNC:%.*]] = trunc i64 [[SRC2:%.*]] to i32 1035; OPT-NEXT: br i1 undef, label [[END:%.*]], label [[IF_ELSE:%.*]] 1036; OPT: if.else: 1037; OPT-NEXT: [[AND3:%.*]] = and i32 [[SRC2_TRUNC]], 3 1038; OPT-NEXT: [[SHL2:%.*]] = shl nuw nsw i64 [[SHR47]], 2 1039; OPT-NEXT: [[SHL2_TRUNC:%.*]] = trunc i64 [[SHL2]] to i32 1040; OPT-NEXT: [[AND12:%.*]] = and i32 [[SHL2_TRUNC]], 12 1041; OPT-NEXT: [[BFISOURCE:%.*]] = or i32 [[AND3]], [[AND12]] 1042; OPT-NEXT: [[BFIRHS:%.*]] = shl nuw nsw i32 [[BFISOURCE]], 4 1043; OPT-NEXT: [[BFI:%.*]] = or i32 [[BFIRHS]], [[BFISOURCE]] 1044; OPT-NEXT: [[BFITRUNC:%.*]] = trunc i32 [[BFI]] to i16 1045; OPT-NEXT: store i16 [[BFITRUNC]], ptr [[PTR:%.*]], align 4 1046; OPT-NEXT: br label [[END]] 1047; OPT: end: 1048; OPT-NEXT: ret void 1049; 1050entry: 1051 %shr47 = lshr i64 %src, 47 1052 %src2.trunc = trunc i64 %src2 to i32 1053 br i1 undef, label %end, label %if.else 1054 1055if.else: 1056 %and3 = and i32 %src2.trunc, 3 1057 %shl2 = shl nuw nsw i64 %shr47, 2 1058 %shl2.trunc = trunc i64 %shl2 to i32 1059 %and12 = and i32 %shl2.trunc, 12 1060 %BFISource = or i32 %and3, %and12 ; ...00000ABCD 1061 %BFIRHS = shl nuw nsw i32 %BFISource, 4 ; ...0ABCD0000 1062 %BFI = or i32 %BFIRHS, %BFISource ; ...0ABCDABCD 1063 %BFItrunc = trunc i32 %BFI to i16 1064 store i16 %BFItrunc, ptr %ptr, align 4 1065 br label %end 1066 1067end: 1068 ret void 1069} 1070