1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=ARM %s 3; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=THUMB %s 4; RUN: llc -mtriple=thumb-eabi -arm-atomic-cfg-tidy=0 -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix=T2 %s 5; RUN: llc -mtriple=thumbv8-eabi -arm-atomic-cfg-tidy=0 %s -o - | FileCheck -check-prefix=V8 %s 6 7; FIXME: The -mtriple=thumb test doesn't change if -disable-peephole is specified. 8 9%struct.Foo = type { ptr } 10 11define ptr @foo(ptr %this, i32 %acc) nounwind readonly align 2 { 12; ARM-LABEL: foo: 13; ARM: @ %bb.0: @ %entry 14; ARM-NEXT: add r2, r0, #4 15; ARM-NEXT: mov r12, #1 16; ARM-NEXT: b .LBB0_3 17; ARM-NEXT: .LBB0_1: @ %tailrecurse.switch 18; ARM-NEXT: @ in Loop: Header=BB0_3 Depth=1 19; ARM-NEXT: cmp r3, #1 20; ARM-NEXT: movne pc, lr 21; ARM-NEXT: .LBB0_2: @ %sw.bb 22; ARM-NEXT: @ in Loop: Header=BB0_3 Depth=1 23; ARM-NEXT: orr r1, r3, r1, lsl #1 24; ARM-NEXT: add r2, r2, #4 25; ARM-NEXT: add r12, r12, #1 26; ARM-NEXT: .LBB0_3: @ %tailrecurse 27; ARM-NEXT: @ =>This Inner Loop Header: Depth=1 28; ARM-NEXT: ldr r3, [r2, #-4] 29; ARM-NEXT: ands r3, r3, #3 30; ARM-NEXT: beq .LBB0_2 31; ARM-NEXT: @ %bb.4: @ %tailrecurse.switch 32; ARM-NEXT: @ in Loop: Header=BB0_3 Depth=1 33; ARM-NEXT: cmp r3, #3 34; ARM-NEXT: moveq r0, r2 35; ARM-NEXT: moveq pc, lr 36; ARM-NEXT: .LBB0_5: @ %tailrecurse.switch 37; ARM-NEXT: @ in Loop: Header=BB0_3 Depth=1 38; ARM-NEXT: cmp r3, #2 39; ARM-NEXT: bne .LBB0_1 40; ARM-NEXT: @ %bb.6: @ %sw.bb8 41; ARM-NEXT: add r1, r1, r12 42; ARM-NEXT: add r0, r0, r1, lsl #2 43; ARM-NEXT: mov pc, lr 44; 45; THUMB-LABEL: foo: 46; THUMB: @ %bb.0: @ %entry 47; THUMB-NEXT: .save {r4, r5, r7, lr} 48; THUMB-NEXT: push {r4, r5, r7, lr} 49; THUMB-NEXT: movs r2, #1 50; THUMB-NEXT: movs r3, r0 51; THUMB-NEXT: .LBB0_1: @ %tailrecurse 52; THUMB-NEXT: @ =>This Inner Loop Header: Depth=1 53; THUMB-NEXT: ldr r5, [r3] 54; THUMB-NEXT: movs r4, #3 55; THUMB-NEXT: ands r4, r5 56; THUMB-NEXT: beq .LBB0_5 57; THUMB-NEXT: @ %bb.2: @ %tailrecurse.switch 58; THUMB-NEXT: @ in Loop: Header=BB0_1 Depth=1 59; THUMB-NEXT: cmp r4, #3 60; THUMB-NEXT: beq .LBB0_6 61; THUMB-NEXT: @ %bb.3: @ %tailrecurse.switch 62; THUMB-NEXT: @ in Loop: Header=BB0_1 Depth=1 63; THUMB-NEXT: cmp r4, #2 64; THUMB-NEXT: beq .LBB0_7 65; THUMB-NEXT: @ %bb.4: @ %tailrecurse.switch 66; THUMB-NEXT: @ in Loop: Header=BB0_1 Depth=1 67; THUMB-NEXT: cmp r4, #1 68; THUMB-NEXT: bne .LBB0_9 69; THUMB-NEXT: .LBB0_5: @ %sw.bb 70; THUMB-NEXT: @ in Loop: Header=BB0_1 Depth=1 71; THUMB-NEXT: lsls r1, r1, #1 72; THUMB-NEXT: orrs r4, r1 73; THUMB-NEXT: adds r3, r3, #4 74; THUMB-NEXT: adds r2, r2, #1 75; THUMB-NEXT: movs r1, r4 76; THUMB-NEXT: b .LBB0_1 77; THUMB-NEXT: .LBB0_6: @ %sw.bb6 78; THUMB-NEXT: adds r0, r3, #4 79; THUMB-NEXT: b .LBB0_8 80; THUMB-NEXT: .LBB0_7: @ %sw.bb8 81; THUMB-NEXT: adds r1, r1, r2 82; THUMB-NEXT: lsls r1, r1, #2 83; THUMB-NEXT: adds r0, r0, r1 84; THUMB-NEXT: .LBB0_8: @ %sw.bb6 85; THUMB-NEXT: pop {r4, r5, r7} 86; THUMB-NEXT: pop {r1} 87; THUMB-NEXT: bx r1 88; THUMB-NEXT: .LBB0_9: @ %sw.epilog 89; THUMB-NEXT: pop {r4, r5, r7} 90; THUMB-NEXT: pop {r0} 91; THUMB-NEXT: bx r0 92; 93; T2-LABEL: foo: 94; T2: @ %bb.0: @ %entry 95; T2-NEXT: adds r2, r0, #4 96; T2-NEXT: mov.w r12, #1 97; T2-NEXT: b .LBB0_3 98; T2-NEXT: .LBB0_1: @ %tailrecurse.switch 99; T2-NEXT: @ in Loop: Header=BB0_3 Depth=1 100; T2-NEXT: cmp r3, #1 101; T2-NEXT: it ne 102; T2-NEXT: bxne lr 103; T2-NEXT: .LBB0_2: @ %sw.bb 104; T2-NEXT: @ in Loop: Header=BB0_3 Depth=1 105; T2-NEXT: orr.w r1, r3, r1, lsl #1 106; T2-NEXT: adds r2, #4 107; T2-NEXT: add.w r12, r12, #1 108; T2-NEXT: .LBB0_3: @ %tailrecurse 109; T2-NEXT: @ =>This Inner Loop Header: Depth=1 110; T2-NEXT: ldr r3, [r2, #-4] 111; T2-NEXT: ands r3, r3, #3 112; T2-NEXT: beq .LBB0_2 113; T2-NEXT: @ %bb.4: @ %tailrecurse.switch 114; T2-NEXT: @ in Loop: Header=BB0_3 Depth=1 115; T2-NEXT: cmp r3, #3 116; T2-NEXT: itt eq 117; T2-NEXT: moveq r0, r2 118; T2-NEXT: bxeq lr 119; T2-NEXT: .LBB0_5: @ %tailrecurse.switch 120; T2-NEXT: @ in Loop: Header=BB0_3 Depth=1 121; T2-NEXT: cmp r3, #2 122; T2-NEXT: bne .LBB0_1 123; T2-NEXT: @ %bb.6: @ %sw.bb8 124; T2-NEXT: add r1, r12 125; T2-NEXT: add.w r0, r0, r1, lsl #2 126; T2-NEXT: bx lr 127; 128; V8-LABEL: foo: 129; V8: @ %bb.0: @ %entry 130; V8-NEXT: adds r2, r0, #4 131; V8-NEXT: mov.w r12, #1 132; V8-NEXT: b .LBB0_3 133; V8-NEXT: .LBB0_1: @ %tailrecurse.switch 134; V8-NEXT: @ in Loop: Header=BB0_3 Depth=1 135; V8-NEXT: cmp r3, #1 136; V8-NEXT: it ne 137; V8-NEXT: bxne lr 138; V8-NEXT: .LBB0_2: @ %sw.bb 139; V8-NEXT: @ in Loop: Header=BB0_3 Depth=1 140; V8-NEXT: orr.w r1, r3, r1, lsl #1 141; V8-NEXT: adds r2, #4 142; V8-NEXT: add.w r12, r12, #1 143; V8-NEXT: .LBB0_3: @ %tailrecurse 144; V8-NEXT: @ =>This Inner Loop Header: Depth=1 145; V8-NEXT: ldr r3, [r2, #-4] 146; V8-NEXT: ands r3, r3, #3 147; V8-NEXT: beq .LBB0_2 148; V8-NEXT: @ %bb.4: @ %tailrecurse.switch 149; V8-NEXT: @ in Loop: Header=BB0_3 Depth=1 150; V8-NEXT: cmp r3, #3 151; V8-NEXT: itt eq 152; V8-NEXT: moveq r0, r2 153; V8-NEXT: bxeq lr 154; V8-NEXT: .LBB0_5: @ %tailrecurse.switch 155; V8-NEXT: @ in Loop: Header=BB0_3 Depth=1 156; V8-NEXT: cmp r3, #2 157; V8-NEXT: bne .LBB0_1 158; V8-NEXT: @ %bb.6: @ %sw.bb8 159; V8-NEXT: add r1, r12 160; V8-NEXT: add.w r0, r0, r1, lsl #2 161; V8-NEXT: bx lr 162entry: 163 %scevgep = getelementptr %struct.Foo, ptr %this, i32 1 164 br label %tailrecurse 165 166tailrecurse: ; preds = %sw.bb, %entry 167 %lsr.iv2 = phi ptr [ %scevgep3, %sw.bb ], [ %scevgep, %entry ] 168 %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ] 169 %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ] 170 %scevgep5 = getelementptr ptr, ptr %lsr.iv2, i32 -1 171 %tmp2 = load ptr, ptr %scevgep5 172 %0 = ptrtoint ptr %tmp2 to i32 173 174 175 176 177 %and = and i32 %0, 3 178 %tst = icmp eq i32 %and, 0 179 br i1 %tst, label %sw.bb, label %tailrecurse.switch 180 181tailrecurse.switch: ; preds = %tailrecurse 182 183 switch i32 %and, label %sw.epilog [ 184 i32 1, label %sw.bb 185 i32 3, label %sw.bb6 186 i32 2, label %sw.bb8 187 ], !prof !1 188 189sw.bb: ; preds = %tailrecurse.switch, %tailrecurse 190 %shl = shl i32 %acc.tr, 1 191 %or = or i32 %and, %shl 192 %lsr.iv.next = add i32 %lsr.iv, 1 193 %scevgep3 = getelementptr %struct.Foo, ptr %lsr.iv2, i32 1 194 br label %tailrecurse 195 196sw.bb6: ; preds = %tailrecurse.switch 197 ret ptr %lsr.iv2 198 199sw.bb8: ; preds = %tailrecurse.switch 200 %tmp1 = add i32 %acc.tr, %lsr.iv 201 %add.ptr11 = getelementptr inbounds %struct.Foo, ptr %this, i32 %tmp1 202 ret ptr %add.ptr11 203 204sw.epilog: ; preds = %tailrecurse.switch 205 ret ptr undef 206} 207 208; Another test that exercises the AND/TST peephole optimization and also 209; generates a predicated ANDS instruction. Check that the predicate is printed 210; after the "S" modifier on the instruction. 211 212%struct.S = type { ptr, [1 x i8] } 213 214define internal zeroext i8 @bar(ptr %x, ptr nocapture %y) nounwind readonly { 215; ARM-LABEL: bar: 216; ARM: @ %bb.0: @ %entry 217; ARM-NEXT: ldrb r2, [r0, #4] 218; ARM-NEXT: ands r2, r2, #112 219; ARM-NEXT: ldrbne r1, [r1, #4] 220; ARM-NEXT: andsne r1, r1, #112 221; ARM-NEXT: beq .LBB1_2 222; ARM-NEXT: @ %bb.1: @ %bb2 223; ARM-NEXT: cmp r2, #16 224; ARM-NEXT: cmpne r1, #16 225; ARM-NEXT: andeq r0, r0, #255 226; ARM-NEXT: moveq pc, lr 227; ARM-NEXT: .LBB1_2: @ %return 228; ARM-NEXT: mov r0, #1 229; ARM-NEXT: mov pc, lr 230; 231; THUMB-LABEL: bar: 232; THUMB: @ %bb.0: @ %entry 233; THUMB-NEXT: ldrb r2, [r0, #4] 234; THUMB-NEXT: movs r3, #112 235; THUMB-NEXT: ands r2, r3 236; THUMB-NEXT: beq .LBB1_4 237; THUMB-NEXT: @ %bb.1: @ %bb 238; THUMB-NEXT: ldrb r1, [r1, #4] 239; THUMB-NEXT: ands r1, r3 240; THUMB-NEXT: beq .LBB1_4 241; THUMB-NEXT: @ %bb.2: @ %bb2 242; THUMB-NEXT: cmp r2, #16 243; THUMB-NEXT: beq .LBB1_5 244; THUMB-NEXT: @ %bb.3: @ %bb2 245; THUMB-NEXT: cmp r1, #16 246; THUMB-NEXT: beq .LBB1_5 247; THUMB-NEXT: .LBB1_4: @ %return 248; THUMB-NEXT: movs r0, #1 249; THUMB-NEXT: bx lr 250; THUMB-NEXT: .LBB1_5: @ %bb4 251; THUMB-NEXT: movs r1, #255 252; THUMB-NEXT: ands r0, r1 253; THUMB-NEXT: bx lr 254; 255; T2-LABEL: bar: 256; T2: @ %bb.0: @ %entry 257; T2-NEXT: ldrb r2, [r0, #4] 258; T2-NEXT: ands r2, r2, #112 259; T2-NEXT: itt ne 260; T2-NEXT: ldrbne r1, [r1, #4] 261; T2-NEXT: andsne r1, r1, #112 262; T2-NEXT: beq .LBB1_2 263; T2-NEXT: @ %bb.1: @ %bb2 264; T2-NEXT: cmp r2, #16 265; T2-NEXT: itee ne 266; T2-NEXT: cmpne r1, #16 267; T2-NEXT: uxtbeq r0, r0 268; T2-NEXT: bxeq lr 269; T2-NEXT: .LBB1_2: @ %return 270; T2-NEXT: movs r0, #1 271; T2-NEXT: bx lr 272; 273; V8-LABEL: bar: 274; V8: @ %bb.0: @ %entry 275; V8-NEXT: ldrb r2, [r0, #4] 276; V8-NEXT: ands r2, r2, #112 277; V8-NEXT: itt ne 278; V8-NEXT: ldrbne r1, [r1, #4] 279; V8-NEXT: andsne r1, r1, #112 280; V8-NEXT: beq .LBB1_2 281; V8-NEXT: @ %bb.1: @ %bb2 282; V8-NEXT: cmp r2, #16 283; V8-NEXT: itee ne 284; V8-NEXT: cmpne r1, #16 285; V8-NEXT: uxtbeq r0, r0 286; V8-NEXT: bxeq lr 287; V8-NEXT: .LBB1_2: @ %return 288; V8-NEXT: movs r0, #1 289; V8-NEXT: bx lr 290entry: 291 %0 = getelementptr inbounds %struct.S, ptr %x, i32 0, i32 1, i32 0 292 %1 = load i8, ptr %0, align 1 293 %2 = zext i8 %1 to i32 294 %3 = and i32 %2, 112 295 %4 = icmp eq i32 %3, 0 296 br i1 %4, label %return, label %bb 297 298bb: ; preds = %entry 299 %5 = getelementptr inbounds %struct.S, ptr %y, i32 0, i32 1, i32 0 300 %6 = load i8, ptr %5, align 1 301 %7 = zext i8 %6 to i32 302 %8 = and i32 %7, 112 303 %9 = icmp eq i32 %8, 0 304 br i1 %9, label %return, label %bb2 305 306bb2: ; preds = %bb 307 %10 = icmp eq i32 %3, 16 308 %11 = icmp eq i32 %8, 16 309 %or.cond = or i1 %10, %11 310 br i1 %or.cond, label %bb4, label %return 311 312bb4: ; preds = %bb2 313 %12 = ptrtoint ptr %x to i32 314 %phitmp = trunc i32 %12 to i8 315 ret i8 %phitmp 316 317return: ; preds = %bb2, %bb, %entry 318 ret i8 1 319} 320 321 322; We were looking through multiple COPY instructions to find an AND we might 323; fold into a TST, but in doing so we changed the register being tested allowing 324; folding of unrelated tests (in this case, a TST against r1 was eliminated in 325; favour of an AND of r0). 326 327define i32 @test_tst_assessment(i32 %a, i32 %b) { 328; ARM-LABEL: test_tst_assessment: 329; ARM: @ %bb.0: 330; ARM-NEXT: and r0, r0, #1 331; ARM-NEXT: tst r1, #1 332; ARM-NEXT: subne r0, r0, #1 333; ARM-NEXT: mov pc, lr 334; 335; THUMB-LABEL: test_tst_assessment: 336; THUMB: @ %bb.0: 337; THUMB-NEXT: movs r2, r0 338; THUMB-NEXT: movs r0, #1 339; THUMB-NEXT: ands r0, r2 340; THUMB-NEXT: lsls r1, r1, #31 341; THUMB-NEXT: beq .LBB2_2 342; THUMB-NEXT: @ %bb.1: 343; THUMB-NEXT: subs r0, r0, #1 344; THUMB-NEXT: .LBB2_2: 345; THUMB-NEXT: bx lr 346; 347; T2-LABEL: test_tst_assessment: 348; T2: @ %bb.0: 349; T2-NEXT: and r0, r0, #1 350; T2-NEXT: lsls r1, r1, #31 351; T2-NEXT: it ne 352; T2-NEXT: subne r0, #1 353; T2-NEXT: bx lr 354; 355; V8-LABEL: test_tst_assessment: 356; V8: @ %bb.0: 357; V8-NEXT: and r0, r0, #1 358; V8-NEXT: lsls r1, r1, #31 359; V8-NEXT: it ne 360; V8-NEXT: subne r0, #1 361; V8-NEXT: bx lr 362 %and1 = and i32 %a, 1 363 %sub = sub i32 %and1, 1 364 %and2 = and i32 %b, 1 365 %cmp = icmp eq i32 %and2, 0 366 %sel = select i1 %cmp, i32 %and1, i32 %sub 367 ret i32 %sel 368} 369 370!1 = !{!"branch_weights", i32 1, i32 1, i32 3, i32 2 } 371