1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefixes=RV32I,RV32NOFUSION %s 4; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefixes=RV64I,RV64NOFUSION %s 6; RUN: llc -mtriple=riscv32 -relocation-model=pic -verify-machineinstrs < %s \ 7; RUN: -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV32I,RV32FUSION %s 8; RUN: llc -mtriple=riscv64 -relocation-model=pic -verify-machineinstrs < %s \ 9; RUN: -mattr=+auipc-addi-fusion | FileCheck -check-prefixes=RV64I,RV64FUSION %s 10 11; Verifies that MachineLICM can hoist address generation pseudos out of loops. 12 13@l = protected global i32 0, align 4 14 15define void @test_lla(i32 signext %n) { 16; RV32I-LABEL: test_lla: 17; RV32I: # %bb.0: # %entry 18; RV32I-NEXT: li a1, 0 19; RV32I-NEXT: .Lpcrel_hi0: 20; RV32I-NEXT: auipc a2, %pcrel_hi(l) 21; RV32I-NEXT: .LBB0_1: # %loop 22; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 23; RV32I-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi0)(a2) 24; RV32I-NEXT: addi a1, a1, 1 25; RV32I-NEXT: blt a1, a0, .LBB0_1 26; RV32I-NEXT: # %bb.2: # %ret 27; RV32I-NEXT: ret 28; 29; RV64I-LABEL: test_lla: 30; RV64I: # %bb.0: # %entry 31; RV64I-NEXT: li a1, 0 32; RV64I-NEXT: .Lpcrel_hi0: 33; RV64I-NEXT: auipc a2, %pcrel_hi(l) 34; RV64I-NEXT: .LBB0_1: # %loop 35; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 36; RV64I-NEXT: lw zero, %pcrel_lo(.Lpcrel_hi0)(a2) 37; RV64I-NEXT: addiw a1, a1, 1 38; RV64I-NEXT: blt a1, a0, .LBB0_1 39; RV64I-NEXT: # %bb.2: # %ret 40; RV64I-NEXT: ret 41entry: 42 br label %loop 43 44loop: 45 %i = phi i32 [ %inc, %loop ], [ 0, %entry ] 46 %0 = load volatile i32, ptr @l, align 4 47 %inc = add nuw nsw i32 %i, 1 48 %cmp = icmp slt i32 %inc, %n 49 br i1 %cmp, label %loop, label %ret 50 51ret: 52 ret void 53} 54 55@g = global i32 0, align 4 56 57define void @test_la(i32 signext %n) { 58; RV32I-LABEL: test_la: 59; RV32I: # %bb.0: # %entry 60; RV32I-NEXT: .Lpcrel_hi1: 61; RV32I-NEXT: auipc a1, %got_pcrel_hi(g) 62; RV32I-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) 63; RV32I-NEXT: li a2, 0 64; RV32I-NEXT: .LBB1_1: # %loop 65; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 66; RV32I-NEXT: lw zero, 0(a1) 67; RV32I-NEXT: addi a2, a2, 1 68; RV32I-NEXT: blt a2, a0, .LBB1_1 69; RV32I-NEXT: # %bb.2: # %ret 70; RV32I-NEXT: ret 71; 72; RV64I-LABEL: test_la: 73; RV64I: # %bb.0: # %entry 74; RV64I-NEXT: .Lpcrel_hi1: 75; RV64I-NEXT: auipc a1, %got_pcrel_hi(g) 76; RV64I-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi1)(a1) 77; RV64I-NEXT: li a2, 0 78; RV64I-NEXT: .LBB1_1: # %loop 79; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 80; RV64I-NEXT: lw zero, 0(a1) 81; RV64I-NEXT: addiw a2, a2, 1 82; RV64I-NEXT: blt a2, a0, .LBB1_1 83; RV64I-NEXT: # %bb.2: # %ret 84; RV64I-NEXT: ret 85entry: 86 br label %loop 87 88loop: 89 %i = phi i32 [ %inc, %loop ], [ 0, %entry ] 90 %0 = load volatile i32, ptr @g, align 4 91 %inc = add nuw nsw i32 %i, 1 92 %cmp = icmp slt i32 %inc, %n 93 br i1 %cmp, label %loop, label %ret 94 95ret: 96 ret void 97} 98 99@ie = external thread_local(initialexec) global i32 100 101define void @test_la_tls_ie(i32 signext %n) { 102; RV32I-LABEL: test_la_tls_ie: 103; RV32I: # %bb.0: # %entry 104; RV32I-NEXT: li a1, 0 105; RV32I-NEXT: .Lpcrel_hi2: 106; RV32I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie) 107; RV32I-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi2)(a2) 108; RV32I-NEXT: add a2, a2, tp 109; RV32I-NEXT: .LBB2_1: # %loop 110; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 111; RV32I-NEXT: lw zero, 0(a2) 112; RV32I-NEXT: addi a1, a1, 1 113; RV32I-NEXT: blt a1, a0, .LBB2_1 114; RV32I-NEXT: # %bb.2: # %ret 115; RV32I-NEXT: ret 116; 117; RV64I-LABEL: test_la_tls_ie: 118; RV64I: # %bb.0: # %entry 119; RV64I-NEXT: li a1, 0 120; RV64I-NEXT: .Lpcrel_hi2: 121; RV64I-NEXT: auipc a2, %tls_ie_pcrel_hi(ie) 122; RV64I-NEXT: ld a2, %pcrel_lo(.Lpcrel_hi2)(a2) 123; RV64I-NEXT: add a2, a2, tp 124; RV64I-NEXT: .LBB2_1: # %loop 125; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 126; RV64I-NEXT: lw zero, 0(a2) 127; RV64I-NEXT: addiw a1, a1, 1 128; RV64I-NEXT: blt a1, a0, .LBB2_1 129; RV64I-NEXT: # %bb.2: # %ret 130; RV64I-NEXT: ret 131entry: 132 br label %loop 133 134loop: 135 %i = phi i32 [ %inc, %loop ], [ 0, %entry ] 136 %0 = load volatile i32, ptr @ie, align 4 137 %inc = add nuw nsw i32 %i, 1 138 %cmp = icmp slt i32 %inc, %n 139 br i1 %cmp, label %loop, label %ret 140 141ret: 142 ret void 143} 144 145@gd = external thread_local global i32 146 147define void @test_la_tls_gd(i32 signext %n) nounwind { 148; RV32NOFUSION-LABEL: test_la_tls_gd: 149; RV32NOFUSION: # %bb.0: # %entry 150; RV32NOFUSION-NEXT: addi sp, sp, -16 151; RV32NOFUSION-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 152; RV32NOFUSION-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 153; RV32NOFUSION-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 154; RV32NOFUSION-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 155; RV32NOFUSION-NEXT: mv s0, a0 156; RV32NOFUSION-NEXT: li s2, 0 157; RV32NOFUSION-NEXT: .Lpcrel_hi3: 158; RV32NOFUSION-NEXT: auipc a0, %tls_gd_pcrel_hi(gd) 159; RV32NOFUSION-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3) 160; RV32NOFUSION-NEXT: .LBB3_1: # %loop 161; RV32NOFUSION-NEXT: # =>This Inner Loop Header: Depth=1 162; RV32NOFUSION-NEXT: mv a0, s1 163; RV32NOFUSION-NEXT: call __tls_get_addr 164; RV32NOFUSION-NEXT: lw zero, 0(a0) 165; RV32NOFUSION-NEXT: addi s2, s2, 1 166; RV32NOFUSION-NEXT: blt s2, s0, .LBB3_1 167; RV32NOFUSION-NEXT: # %bb.2: # %ret 168; RV32NOFUSION-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 169; RV32NOFUSION-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 170; RV32NOFUSION-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 171; RV32NOFUSION-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 172; RV32NOFUSION-NEXT: addi sp, sp, 16 173; RV32NOFUSION-NEXT: ret 174; 175; RV64NOFUSION-LABEL: test_la_tls_gd: 176; RV64NOFUSION: # %bb.0: # %entry 177; RV64NOFUSION-NEXT: addi sp, sp, -32 178; RV64NOFUSION-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 179; RV64NOFUSION-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 180; RV64NOFUSION-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 181; RV64NOFUSION-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 182; RV64NOFUSION-NEXT: mv s0, a0 183; RV64NOFUSION-NEXT: li s2, 0 184; RV64NOFUSION-NEXT: .Lpcrel_hi3: 185; RV64NOFUSION-NEXT: auipc a0, %tls_gd_pcrel_hi(gd) 186; RV64NOFUSION-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3) 187; RV64NOFUSION-NEXT: .LBB3_1: # %loop 188; RV64NOFUSION-NEXT: # =>This Inner Loop Header: Depth=1 189; RV64NOFUSION-NEXT: mv a0, s1 190; RV64NOFUSION-NEXT: call __tls_get_addr 191; RV64NOFUSION-NEXT: lw zero, 0(a0) 192; RV64NOFUSION-NEXT: addiw s2, s2, 1 193; RV64NOFUSION-NEXT: blt s2, s0, .LBB3_1 194; RV64NOFUSION-NEXT: # %bb.2: # %ret 195; RV64NOFUSION-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 196; RV64NOFUSION-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 197; RV64NOFUSION-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 198; RV64NOFUSION-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 199; RV64NOFUSION-NEXT: addi sp, sp, 32 200; RV64NOFUSION-NEXT: ret 201; 202; RV32FUSION-LABEL: test_la_tls_gd: 203; RV32FUSION: # %bb.0: # %entry 204; RV32FUSION-NEXT: addi sp, sp, -16 205; RV32FUSION-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 206; RV32FUSION-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 207; RV32FUSION-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 208; RV32FUSION-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 209; RV32FUSION-NEXT: mv s0, a0 210; RV32FUSION-NEXT: li s2, 0 211; RV32FUSION-NEXT: .Lpcrel_hi3: 212; RV32FUSION-NEXT: auipc s1, %tls_gd_pcrel_hi(gd) 213; RV32FUSION-NEXT: addi s1, s1, %pcrel_lo(.Lpcrel_hi3) 214; RV32FUSION-NEXT: .LBB3_1: # %loop 215; RV32FUSION-NEXT: # =>This Inner Loop Header: Depth=1 216; RV32FUSION-NEXT: mv a0, s1 217; RV32FUSION-NEXT: call __tls_get_addr 218; RV32FUSION-NEXT: lw zero, 0(a0) 219; RV32FUSION-NEXT: addi s2, s2, 1 220; RV32FUSION-NEXT: blt s2, s0, .LBB3_1 221; RV32FUSION-NEXT: # %bb.2: # %ret 222; RV32FUSION-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 223; RV32FUSION-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 224; RV32FUSION-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 225; RV32FUSION-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 226; RV32FUSION-NEXT: addi sp, sp, 16 227; RV32FUSION-NEXT: ret 228; 229; RV64FUSION-LABEL: test_la_tls_gd: 230; RV64FUSION: # %bb.0: # %entry 231; RV64FUSION-NEXT: addi sp, sp, -32 232; RV64FUSION-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 233; RV64FUSION-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 234; RV64FUSION-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 235; RV64FUSION-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 236; RV64FUSION-NEXT: mv s0, a0 237; RV64FUSION-NEXT: li s2, 0 238; RV64FUSION-NEXT: .Lpcrel_hi3: 239; RV64FUSION-NEXT: auipc s1, %tls_gd_pcrel_hi(gd) 240; RV64FUSION-NEXT: addi s1, s1, %pcrel_lo(.Lpcrel_hi3) 241; RV64FUSION-NEXT: .LBB3_1: # %loop 242; RV64FUSION-NEXT: # =>This Inner Loop Header: Depth=1 243; RV64FUSION-NEXT: mv a0, s1 244; RV64FUSION-NEXT: call __tls_get_addr 245; RV64FUSION-NEXT: lw zero, 0(a0) 246; RV64FUSION-NEXT: addiw s2, s2, 1 247; RV64FUSION-NEXT: blt s2, s0, .LBB3_1 248; RV64FUSION-NEXT: # %bb.2: # %ret 249; RV64FUSION-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 250; RV64FUSION-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 251; RV64FUSION-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 252; RV64FUSION-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 253; RV64FUSION-NEXT: addi sp, sp, 32 254; RV64FUSION-NEXT: ret 255entry: 256 br label %loop 257 258loop: 259 %i = phi i32 [ %inc, %loop ], [ 0, %entry ] 260 %0 = load volatile i32, ptr @gd, align 4 261 %inc = add nuw nsw i32 %i, 1 262 %cmp = icmp slt i32 %inc, %n 263 br i1 %cmp, label %loop, label %ret 264 265ret: 266 ret void 267} 268