1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64 4; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV32 5; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV64 6 7%struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 } 8@s = common dso_local global %struct.S zeroinitializer, align 4 9@foo = global [6 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5, i16 0], align 2 10@g = global [1048576 x i8] zeroinitializer, align 1 11@bar = external global [0 x i8], align 1 12 13 14define dso_local void @multiple_stores() local_unnamed_addr nounwind { 15; CHECK-LABEL: multiple_stores: 16; CHECK: # %bb.0: # %entry 17; CHECK-NEXT: lui a0, %hi(s) 18; CHECK-NEXT: addi a0, a0, %lo(s) 19; CHECK-NEXT: li a1, 10 20; CHECK-NEXT: li a2, 20 21; CHECK-NEXT: sw a1, 160(a0) 22; CHECK-NEXT: sw a2, 164(a0) 23; CHECK-NEXT: ret 24entry: 25 store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 26 store i32 20, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 2), align 4 27 ret void 28} 29 30define dso_local void @control_flow_with_mem_access() local_unnamed_addr nounwind { 31; CHECK-LABEL: control_flow_with_mem_access: 32; CHECK: # %bb.0: # %entry 33; CHECK-NEXT: lui a0, %hi(s) 34; CHECK-NEXT: addi a0, a0, %lo(s) 35; CHECK-NEXT: lw a1, 164(a0) 36; CHECK-NEXT: blez a1, .LBB1_2 37; CHECK-NEXT: # %bb.1: # %if.then 38; CHECK-NEXT: li a1, 10 39; CHECK-NEXT: sw a1, 160(a0) 40; CHECK-NEXT: .LBB1_2: # %if.end 41; CHECK-NEXT: ret 42entry: 43 %0 = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 2), align 4 44 %cmp = icmp sgt i32 %0, 0 45 br i1 %cmp, label %if.then, label %if.end 46 47if.then: ; preds = %entry 48 store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 49 br label %if.end 50 51if.end: ; preds = %if.then, %entry 52 ret void 53} 54 55; This test checks that the offset is reconstructed correctly when 56; "addi" of the big offset has a negative immediate. 57; without peephole this generates: 58; lui a1, %hi(g) 59; addi a1, a0, %lo(g) 60; lui a0, 18 ---> offset 61; addi a0, a0, -160 62; add a0, a0, a1 ---> base + offset. 63define ptr @big_offset_neg_addi() nounwind { 64; CHECK-LABEL: big_offset_neg_addi: 65; CHECK: # %bb.0: 66; CHECK-NEXT: lui a0, %hi(g+73568) 67; CHECK-NEXT: addi a0, a0, %lo(g+73568) 68; CHECK-NEXT: ret 69 ret ptr getelementptr inbounds ([1048576 x i8], ptr @g, i32 0, i32 73568) 70} 71 72; This test checks for the case where the offset is only an LUI. 73; without peephole this generates: 74; lui a0, %hi(g) 75; addi a0, a0, %lo(g) 76; lui a1, 128 ---> offset 77; add a0, a0, a1 ---> base + offset. 78define ptr @big_offset_lui_tail() nounwind { 79; CHECK-LABEL: big_offset_lui_tail: 80; CHECK: # %bb.0: 81; CHECK-NEXT: lui a0, %hi(g+524288) 82; CHECK-NEXT: addi a0, a0, %lo(g+524288) 83; CHECK-NEXT: ret 84 ret ptr getelementptr inbounds ([1048576 x i8], ptr @g, i32 0, i32 524288) 85} 86 87define ptr @big_offset_neg_lui_tail() { 88; CHECK-LABEL: big_offset_neg_lui_tail: 89; CHECK: # %bb.0: 90; CHECK-NEXT: lui a0, %hi(bar-8192) 91; CHECK-NEXT: addi a0, a0, %lo(bar-8192) 92; CHECK-NEXT: ret 93 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i32 -8192) 94} 95 96define dso_local ptr @big_offset_one_use() local_unnamed_addr nounwind { 97; CHECK-LABEL: big_offset_one_use: 98; CHECK: # %bb.0: # %entry 99; CHECK-NEXT: lui a0, %hi(s+16572) 100; CHECK-NEXT: addi a0, a0, %lo(s+16572) 101; CHECK-NEXT: ret 102entry: 103 ret ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 5) 104} 105 106define dso_local ptr @small_offset_one_use() local_unnamed_addr nounwind { 107; CHECK-LABEL: small_offset_one_use: 108; CHECK: # %bb.0: # %entry 109; CHECK-NEXT: lui a0, %hi(s+160) 110; CHECK-NEXT: addi a0, a0, %lo(s+160) 111; CHECK-NEXT: ret 112entry: 113 ret ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1) 114} 115 116define dso_local ptr @control_flow_no_mem(i32 %n) local_unnamed_addr nounwind { 117; CHECK-LABEL: control_flow_no_mem: 118; CHECK: # %bb.0: # %entry 119; CHECK-NEXT: lui a0, %hi(s) 120; CHECK-NEXT: addi a0, a0, %lo(s) 121; CHECK-NEXT: lw a1, 164(a0) 122; CHECK-NEXT: beqz a1, .LBB7_2 123; CHECK-NEXT: # %bb.1: # %if.end 124; CHECK-NEXT: addi a0, a0, 168 125; CHECK-NEXT: ret 126; CHECK-NEXT: .LBB7_2: # %if.then 127; CHECK-NEXT: addi a0, a0, 160 128; CHECK-NEXT: ret 129entry: 130 %0 = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 2), align 4 131 %cmp = icmp eq i32 %0, 0 132 br i1 %cmp, label %if.then, label %if.end 133if.then: ; preds = %entry 134 ret ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1) 135if.end: ; preds = %if.then, %entry 136 ret ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 3) 137} 138 139define dso_local i32 @load_half() nounwind { 140; RV32-LABEL: load_half: 141; RV32: # %bb.0: # %entry 142; RV32-NEXT: lui a0, %hi(foo+8) 143; RV32-NEXT: lhu a0, %lo(foo+8)(a0) 144; RV32-NEXT: li a1, 140 145; RV32-NEXT: bne a0, a1, .LBB8_2 146; RV32-NEXT: # %bb.1: # %if.end 147; RV32-NEXT: li a0, 0 148; RV32-NEXT: ret 149; RV32-NEXT: .LBB8_2: # %if.then 150; RV32-NEXT: addi sp, sp, -16 151; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 152; RV32-NEXT: call abort 153; 154; RV64-LABEL: load_half: 155; RV64: # %bb.0: # %entry 156; RV64-NEXT: lui a0, %hi(foo+8) 157; RV64-NEXT: lhu a0, %lo(foo+8)(a0) 158; RV64-NEXT: li a1, 140 159; RV64-NEXT: bne a0, a1, .LBB8_2 160; RV64-NEXT: # %bb.1: # %if.end 161; RV64-NEXT: li a0, 0 162; RV64-NEXT: ret 163; RV64-NEXT: .LBB8_2: # %if.then 164; RV64-NEXT: addi sp, sp, -16 165; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 166; RV64-NEXT: call abort 167entry: 168 %0 = load i16, ptr getelementptr inbounds ([6 x i16], ptr @foo, i32 0, i32 4), align 2 169 %cmp = icmp eq i16 %0, 140 170 br i1 %cmp, label %if.end, label %if.then 171 172if.then: 173 tail call void @abort() 174 unreachable 175 176if.end: 177 ret i32 0 178} 179 180declare void @abort() 181 182define dso_local void @one_store() local_unnamed_addr nounwind { 183; CHECK-LABEL: one_store: 184; CHECK: # %bb.0: # %entry 185; CHECK-NEXT: lui a0, %hi(s+160) 186; CHECK-NEXT: li a1, 10 187; CHECK-NEXT: sw a1, %lo(s+160)(a0) 188; CHECK-NEXT: ret 189entry: 190 store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 191 ret void 192} 193 194define ptr @neg_offset() { 195; CHECK-LABEL: neg_offset: 196; CHECK: # %bb.0: 197; CHECK-NEXT: lui a0, %hi(bar-8191) 198; CHECK-NEXT: addi a0, a0, %lo(bar-8191) 199; CHECK-NEXT: ret 200 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i32 -8191) 201} 202 203; This uses an LUI+ADDI on RV64 that does not produce a simm32. For RV32, we'll 204; truncate the offset. 205define ptr @neg_offset_not_simm32() { 206; RV32-LABEL: neg_offset_not_simm32: 207; RV32: # %bb.0: 208; RV32-NEXT: lui a0, %hi(bar+2147482283) 209; RV32-NEXT: addi a0, a0, %lo(bar+2147482283) 210; RV32-NEXT: ret 211; 212; RV64-LABEL: neg_offset_not_simm32: 213; RV64: # %bb.0: 214; RV64-NEXT: lui a0, %hi(bar) 215; RV64-NEXT: addi a0, a0, %lo(bar) 216; RV64-NEXT: lui a1, 524288 217; RV64-NEXT: addi a1, a1, -1365 218; RV64-NEXT: add a0, a0, a1 219; RV64-NEXT: ret 220 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 -2147485013) 221} 222 223define ptr @offset_addi_addi() { 224; CHECK-LABEL: offset_addi_addi: 225; CHECK: # %bb.0: 226; CHECK-NEXT: lui a0, %hi(bar+3211) 227; CHECK-NEXT: addi a0, a0, %lo(bar+3211) 228; CHECK-NEXT: ret 229 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211) 230} 231 232define ptr @offset_addi_addi_neg() { 233; CHECK-LABEL: offset_addi_addi_neg: 234; CHECK: # %bb.0: 235; CHECK-NEXT: lui a0, %hi(bar-4000) 236; CHECK-NEXT: addi a0, a0, %lo(bar-4000) 237; CHECK-NEXT: ret 238 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 -4000) 239} 240 241; With Zba the constant 6424 is created with LI+SH2ADD. 242define ptr @offset_sh2add() { 243; CHECK-LABEL: offset_sh2add: 244; CHECK: # %bb.0: 245; CHECK-NEXT: lui a0, %hi(bar+6424) 246; CHECK-NEXT: addi a0, a0, %lo(bar+6424) 247; CHECK-NEXT: ret 248 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 6424) 249} 250 251; With Zba the constant 12848 is created with LI+SH3ADD. 252define ptr @offset_sh3add() { 253; CHECK-LABEL: offset_sh3add: 254; CHECK: # %bb.0: 255; CHECK-NEXT: lui a0, %hi(bar+12848) 256; CHECK-NEXT: addi a0, a0, %lo(bar+12848) 257; CHECK-NEXT: ret 258 ret ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 12848) 259} 260 261define dso_local void @read_modify_write() local_unnamed_addr nounwind { 262; CHECK-LABEL: read_modify_write: 263; CHECK: # %bb.0: # %entry 264; CHECK-NEXT: lui a0, %hi(s+160) 265; CHECK-NEXT: lw a1, %lo(s+160)(a0) 266; CHECK-NEXT: addi a1, a1, 10 267; CHECK-NEXT: sw a1, %lo(s+160)(a0) 268; CHECK-NEXT: ret 269entry: 270 %x = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 271 %y = add i32 %x, 10 272 store i32 %y, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 1), align 4 273 ret void 274} 275 276define dso_local void @rmw_with_control_flow() nounwind { 277; CHECK-LABEL: rmw_with_control_flow: 278; CHECK: # %bb.0: # %entry 279; CHECK-NEXT: lui a0, %hi(s+164) 280; CHECK-NEXT: lw a1, %lo(s+164)(a0) 281; CHECK-NEXT: blez a1, .LBB17_2 282; CHECK-NEXT: # %bb.1: # %if.then 283; CHECK-NEXT: li a1, 10 284; CHECK-NEXT: sw a1, %lo(s+164)(a0) 285; CHECK-NEXT: .LBB17_2: # %if.end 286; CHECK-NEXT: ret 287entry: 288 %0 = load i32, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 2), align 4 289 %cmp = icmp sgt i32 %0, 0 290 br i1 %cmp, label %if.then, label %if.end 291 292if.then: ; preds = %entry 293 store i32 10, ptr getelementptr inbounds (%struct.S, ptr @s, i32 0, i32 2), align 4 294 br label %if.end 295 296if.end: ; preds = %if.then, %entry 297 ret void 298} 299 300%struct.foo = type { i32, ptr } 301 302@f = global %struct.foo zeroinitializer, align 8 303 304; Test the case where the store value and base pointer are the same register. 305define void @self_store() { 306; RV32-LABEL: self_store: 307; RV32: # %bb.0: 308; RV32-NEXT: lui a0, %hi(f) 309; RV32-NEXT: addi a1, a0, %lo(f) 310; RV32-NEXT: sw a1, %lo(f+4)(a0) 311; RV32-NEXT: ret 312; 313; RV64-LABEL: self_store: 314; RV64: # %bb.0: 315; RV64-NEXT: lui a0, %hi(f) 316; RV64-NEXT: addi a0, a0, %lo(f) 317; RV64-NEXT: sd a0, 8(a0) 318; RV64-NEXT: ret 319 store ptr @f, ptr getelementptr inbounds (%struct.foo, ptr @f, i64 0, i32 1), align 8 320 ret void 321} 322 323define void @store_addi_addi() { 324; CHECK-LABEL: store_addi_addi: 325; CHECK: # %bb.0: 326; CHECK-NEXT: lui a0, %hi(bar+3211) 327; CHECK-NEXT: li a1, 10 328; CHECK-NEXT: sb a1, %lo(bar+3211)(a0) 329; CHECK-NEXT: ret 330 store i8 10, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211) 331 ret void 332} 333 334define void @store_addi_addi_neg() { 335; CHECK-LABEL: store_addi_addi_neg: 336; CHECK: # %bb.0: 337; CHECK-NEXT: lui a0, %hi(bar-4000) 338; CHECK-NEXT: li a1, 10 339; CHECK-NEXT: sb a1, %lo(bar-4000)(a0) 340; CHECK-NEXT: ret 341 store i8 10, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 -4000) 342 ret void 343} 344 345; With Zba the constant 6424 is created with LI+SH2ADD. 346define void @store_sh2add() { 347; CHECK-LABEL: store_sh2add: 348; CHECK: # %bb.0: 349; CHECK-NEXT: lui a0, %hi(bar+6424) 350; CHECK-NEXT: li a1, 10 351; CHECK-NEXT: sb a1, %lo(bar+6424)(a0) 352; CHECK-NEXT: ret 353 store i8 10, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 6424) 354 ret void 355} 356 357; With Zba the constant 12848 is created with LI+SH3ADD. 358define void @store_sh3add() { 359; CHECK-LABEL: store_sh3add: 360; CHECK: # %bb.0: 361; CHECK-NEXT: lui a0, %hi(bar+12848) 362; CHECK-NEXT: li a1, 10 363; CHECK-NEXT: sb a1, %lo(bar+12848)(a0) 364; CHECK-NEXT: ret 365 store i8 10, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 12848) 366 ret void 367} 368 369define dso_local void @rmw_addi_addi() nounwind { 370; CHECK-LABEL: rmw_addi_addi: 371; CHECK: # %bb.0: # %entry 372; CHECK-NEXT: lui a0, %hi(bar+3211) 373; CHECK-NEXT: lbu a1, %lo(bar+3211)(a0) 374; CHECK-NEXT: addi a1, a1, 10 375; CHECK-NEXT: sb a1, %lo(bar+3211)(a0) 376; CHECK-NEXT: ret 377entry: 378 %0 = load i8, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211) 379 %1 = add i8 %0, 10 380 store i8 %1, ptr getelementptr inbounds ([0 x i8], ptr @bar, i32 0, i64 3211) 381 br label %if.end 382 383if.end: ; preds = %if.then, %entry 384 ret void 385} 386