1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV32I 4; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s -check-prefix=RV64I 6 7; Check that memory accesses to array elements with large offsets have those 8; offsets split into a base offset, plus a smaller offset that is folded into 9; the memory operation. We should also only compute that base offset once, 10; since it can be shared for all memory operations in this test. 11define void @test1(ptr %sp, ptr %t, i32 %n) { 12; RV32I-LABEL: test1: 13; RV32I: # %bb.0: # %entry 14; RV32I-NEXT: lui a2, 20 15; RV32I-NEXT: lw a0, 0(a0) 16; RV32I-NEXT: li a3, 2 17; RV32I-NEXT: addi a2, a2, -1920 18; RV32I-NEXT: add a1, a1, a2 19; RV32I-NEXT: add a0, a0, a2 20; RV32I-NEXT: li a2, 1 21; RV32I-NEXT: sw a3, 0(a0) 22; RV32I-NEXT: sw a2, 4(a0) 23; RV32I-NEXT: sw a2, 0(a1) 24; RV32I-NEXT: sw a3, 4(a1) 25; RV32I-NEXT: ret 26; 27; RV64I-LABEL: test1: 28; RV64I: # %bb.0: # %entry 29; RV64I-NEXT: lui a2, 20 30; RV64I-NEXT: ld a0, 0(a0) 31; RV64I-NEXT: li a3, 2 32; RV64I-NEXT: addiw a2, a2, -1920 33; RV64I-NEXT: add a1, a1, a2 34; RV64I-NEXT: add a0, a0, a2 35; RV64I-NEXT: li a2, 1 36; RV64I-NEXT: sw a3, 0(a0) 37; RV64I-NEXT: sw a2, 4(a0) 38; RV64I-NEXT: sw a2, 0(a1) 39; RV64I-NEXT: sw a3, 4(a1) 40; RV64I-NEXT: ret 41entry: 42 %s = load ptr, ptr %sp 43 %gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20000 44 %gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20001 45 %gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20000 46 %gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20001 47 store i32 2, ptr %gep0 48 store i32 1, ptr %gep1 49 store i32 1, ptr %gep2 50 store i32 2, ptr %gep3 51 ret void 52} 53 54; Ditto. Check it when the GEPs are not in the entry block. 55define void @test2(ptr %sp, ptr %t, i32 %n) { 56; RV32I-LABEL: test2: 57; RV32I: # %bb.0: # %entry 58; RV32I-NEXT: li a3, 0 59; RV32I-NEXT: lw a0, 0(a0) 60; RV32I-NEXT: lui a4, 20 61; RV32I-NEXT: addi a4, a4, -1920 62; RV32I-NEXT: add a1, a1, a4 63; RV32I-NEXT: add a0, a0, a4 64; RV32I-NEXT: blez a2, .LBB1_2 65; RV32I-NEXT: .LBB1_1: # %while_body 66; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 67; RV32I-NEXT: addi a4, a3, 1 68; RV32I-NEXT: sw a4, 0(a0) 69; RV32I-NEXT: sw a3, 4(a0) 70; RV32I-NEXT: sw a4, 0(a1) 71; RV32I-NEXT: sw a3, 4(a1) 72; RV32I-NEXT: mv a3, a4 73; RV32I-NEXT: blt a4, a2, .LBB1_1 74; RV32I-NEXT: .LBB1_2: # %while_end 75; RV32I-NEXT: ret 76; 77; RV64I-LABEL: test2: 78; RV64I: # %bb.0: # %entry 79; RV64I-NEXT: li a3, 0 80; RV64I-NEXT: ld a0, 0(a0) 81; RV64I-NEXT: lui a4, 20 82; RV64I-NEXT: addiw a4, a4, -1920 83; RV64I-NEXT: add a1, a1, a4 84; RV64I-NEXT: add a0, a0, a4 85; RV64I-NEXT: sext.w a2, a2 86; RV64I-NEXT: blez a2, .LBB1_2 87; RV64I-NEXT: .LBB1_1: # %while_body 88; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 89; RV64I-NEXT: addiw a4, a3, 1 90; RV64I-NEXT: sw a4, 0(a0) 91; RV64I-NEXT: sw a3, 4(a0) 92; RV64I-NEXT: sw a4, 0(a1) 93; RV64I-NEXT: sw a3, 4(a1) 94; RV64I-NEXT: mv a3, a4 95; RV64I-NEXT: blt a4, a2, .LBB1_1 96; RV64I-NEXT: .LBB1_2: # %while_end 97; RV64I-NEXT: ret 98entry: 99 %s = load ptr, ptr %sp 100 br label %while_cond 101while_cond: 102 %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] 103 %gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20000 104 %gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20001 105 %gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20000 106 %gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20001 107 %cmp = icmp slt i32 %phi, %n 108 br i1 %cmp, label %while_body, label %while_end 109while_body: 110 %i = add i32 %phi, 1 111 %j = add i32 %phi, 2 112 store i32 %i, ptr %gep0 113 store i32 %phi, ptr %gep1 114 store i32 %i, ptr %gep2 115 store i32 %phi, ptr %gep3 116 br label %while_cond 117while_end: 118 ret void 119} 120 121; GEPs have been manually split so the base GEP does not get used by any memory 122; instructions. Make sure we use an offset and common base for each of the 123; stores. 124define void @test3(ptr %t) { 125; RV32I-LABEL: test3: 126; RV32I: # %bb.0: # %entry 127; RV32I-NEXT: lui a1, 20 128; RV32I-NEXT: li a2, 2 129; RV32I-NEXT: addi a1, a1, -1920 130; RV32I-NEXT: add a0, a0, a1 131; RV32I-NEXT: li a1, 3 132; RV32I-NEXT: sw a2, 4(a0) 133; RV32I-NEXT: sw a1, 8(a0) 134; RV32I-NEXT: ret 135; 136; RV64I-LABEL: test3: 137; RV64I: # %bb.0: # %entry 138; RV64I-NEXT: lui a1, 20 139; RV64I-NEXT: li a2, 2 140; RV64I-NEXT: addiw a1, a1, -1920 141; RV64I-NEXT: add a0, a0, a1 142; RV64I-NEXT: li a1, 3 143; RV64I-NEXT: sw a2, 4(a0) 144; RV64I-NEXT: sw a1, 8(a0) 145; RV64I-NEXT: ret 146entry: 147 %splitgep = getelementptr i8, ptr %t, i64 80000 148 %0 = getelementptr i8, ptr %splitgep, i64 4 149 %1 = getelementptr i8, ptr %splitgep, i64 8 150 store i32 2, ptr %0, align 4 151 store i32 3, ptr %1, align 4 152 ret void 153} 154 155; Test from PR62734. 156define void @test4(ptr %dest) { 157; RV32I-LABEL: test4: 158; RV32I: # %bb.0: 159; RV32I-NEXT: addi a0, a0, 2047 160; RV32I-NEXT: li a1, 1 161; RV32I-NEXT: sb a1, 1(a0) 162; RV32I-NEXT: sb a1, 2(a0) 163; RV32I-NEXT: sb a1, 3(a0) 164; RV32I-NEXT: sb a1, 4(a0) 165; RV32I-NEXT: ret 166; 167; RV64I-LABEL: test4: 168; RV64I: # %bb.0: 169; RV64I-NEXT: addi a0, a0, 2047 170; RV64I-NEXT: li a1, 1 171; RV64I-NEXT: sb a1, 1(a0) 172; RV64I-NEXT: sb a1, 2(a0) 173; RV64I-NEXT: sb a1, 3(a0) 174; RV64I-NEXT: sb a1, 4(a0) 175; RV64I-NEXT: ret 176 %p1 = getelementptr i8, ptr %dest, i32 2048 177 store i8 1, ptr %p1 178 %p2 = getelementptr i8, ptr %dest, i32 2049 179 store i8 1, ptr %p2 180 %p3 = getelementptr i8, ptr %dest, i32 2050 181 store i8 1, ptr %p3 182 %p4 = getelementptr i8, ptr %dest, i32 2051 183 store i8 1, ptr %p4 184 ret void 185} 186