1; Test memcpy using MVC. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s 4; RUN: llc -mtriple=s390x-linux-gnu -filetype=null %s 5 6declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1) nounwind 7declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind 8declare void @foo(ptr, ptr) 9 10; Test a no-op move, i32 version. 11define void @f1(ptr %dest, ptr %src) { 12; CHECK-LABEL: f1: 13; CHECK-NOT: %r2 14; CHECK-NOT: %r3 15; CHECK: br %r14 16 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 false) 17 ret void 18} 19 20; Test a no-op move, i64 version. 21define void @f2(ptr %dest, ptr %src) { 22; CHECK-LABEL: f2: 23; CHECK-NOT: %r2 24; CHECK-NOT: %r3 25; CHECK: br %r14 26 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 false) 27 ret void 28} 29 30; Test a 1-byte move, i32 version. 31define void @f3(ptr %dest, ptr %src) { 32; CHECK-LABEL: f3: 33; CHECK: mvc 0(1,%r2), 0(%r3) 34; CHECK: br %r14 35 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1, i1 false) 36 ret void 37} 38 39; Test a 1-byte move, i64 version. 40define void @f4(ptr %dest, ptr %src) { 41; CHECK-LABEL: f4: 42; CHECK: mvc 0(1,%r2), 0(%r3) 43; CHECK: br %r14 44 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 false) 45 ret void 46} 47 48; Test the upper range of a single MVC, i32 version. 49define void @f5(ptr %dest, ptr %src) { 50; CHECK-LABEL: f5: 51; CHECK: mvc 0(256,%r2), 0(%r3) 52; CHECK: br %r14 53 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 256, i1 false) 54 ret void 55} 56 57; Test the upper range of a single MVC, i64 version. 58define void @f6(ptr %dest, ptr %src) { 59; CHECK-LABEL: f6: 60; CHECK: mvc 0(256,%r2), 0(%r3) 61; CHECK: br %r14 62 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 256, i1 false) 63 ret void 64} 65 66; Test the first case that needs two MVCs. 67define void @f7(ptr %dest, ptr %src) { 68; CHECK-LABEL: f7: 69; CHECK: mvc 0(256,%r2), 0(%r3) 70; CHECK: mvc 256(1,%r2), 256(%r3) 71; CHECK: br %r14 72 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 257, i1 false) 73 ret void 74} 75 76; Test the last-but-one case that needs two MVCs. 77define void @f8(ptr %dest, ptr %src) { 78; CHECK-LABEL: f8: 79; CHECK: mvc 0(256,%r2), 0(%r3) 80; CHECK: mvc 256(255,%r2), 256(%r3) 81; CHECK: br %r14 82 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 511, i1 false) 83 ret void 84} 85 86; Test the last case that needs two MVCs. 87define void @f9(ptr %dest, ptr %src) { 88; CHECK-LABEL: f9: 89; CHECK: mvc 0(256,%r2), 0(%r3) 90; CHECK: mvc 256(256,%r2), 256(%r3) 91; CHECK: br %r14 92 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 512, i1 false) 93 ret void 94} 95 96; Test an arbitrary value that uses straight-line code. 97define void @f10(ptr %dest, ptr %src) { 98; CHECK-LABEL: f10: 99; CHECK: mvc 0(256,%r2), 0(%r3) 100; CHECK: mvc 256(256,%r2), 256(%r3) 101; CHECK: mvc 512(256,%r2), 512(%r3) 102; CHECK: mvc 768(256,%r2), 768(%r3) 103; CHECK: mvc 1024(255,%r2), 1024(%r3) 104; CHECK: br %r14 105 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false) 106 ret void 107} 108 109; ...and again in cases where not all parts are in range of MVC. 110define void @f11(ptr %srcbase, ptr %destbase) { 111; CHECK-LABEL: f11: 112; CHECK: mvc 4000(256,%r2), 3500(%r3) 113; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2) 114; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3) 115; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3) 116; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3) 117; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]]) 118; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]]) 119; CHECK: br %r14 120 %dest = getelementptr i8, ptr %srcbase, i64 4000 121 %src = getelementptr i8, ptr %destbase, i64 3500 122 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false) 123 ret void 124} 125 126; ...and again with a destination frame base that goes out of range. 127define void @f12() { 128; CHECK-LABEL: f12: 129; CHECK: brasl %r14, foo@PLT 130; CHECK: mvc 4076(256,%r15), 2100(%r15) 131; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15) 132; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15) 133; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15) 134; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15) 135; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15) 136; CHECK: brasl %r14, foo@PLT 137; CHECK: br %r14 138 %arr = alloca [6000 x i8] 139 %dest = getelementptr [6000 x i8], ptr %arr, i64 0, i64 3900 140 %src = getelementptr [6000 x i8], ptr %arr, i64 0, i64 1924 141 call void @foo(ptr %dest, ptr %src) 142 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false) 143 call void @foo(ptr %dest, ptr %src) 144 ret void 145} 146 147; ...and again with a source frame base that goes out of range. 148define void @f13() { 149; CHECK-LABEL: f13: 150; CHECK: brasl %r14, foo@PLT 151; CHECK: mvc 200(256,%r15), 3826(%r15) 152; CHECK: mvc 456(256,%r15), 4082(%r15) 153; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15) 154; CHECK: mvc 712(256,%r15), 0([[NEWSRC]]) 155; CHECK: mvc 968(256,%r15), 256([[NEWSRC]]) 156; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]]) 157; CHECK: brasl %r14, foo@PLT 158; CHECK: br %r14 159 %arr = alloca [6000 x i8] 160 %dest = getelementptr [6000 x i8], ptr %arr, i64 0, i64 24 161 %src = getelementptr [6000 x i8], ptr %arr, i64 0, i64 3650 162 call void @foo(ptr %dest, ptr %src) 163 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1279, i1 false) 164 call void @foo(ptr %dest, ptr %src) 165 ret void 166} 167 168; Test the last case that is done using straight-line code. 169define void @f14(ptr %dest, ptr %src) { 170; CHECK-LABEL: f14: 171; CHECK: mvc 0(256,%r2), 0(%r3) 172; CHECK: mvc 256(256,%r2), 256(%r3) 173; CHECK: mvc 512(256,%r2), 512(%r3) 174; CHECK: mvc 768(256,%r2), 768(%r3) 175; CHECK: mvc 1024(256,%r2), 1024(%r3) 176; CHECK: mvc 1280(256,%r2), 1280(%r3) 177; CHECK: br %r14 178 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1536, i1 false) 179 ret void 180} 181 182; Test the first case that is done using a loop. 183define void @f15(ptr %dest, ptr %src) { 184; CHECK-LABEL: f15: 185; CHECK: lghi [[COUNT:%r[0-5]]], 6 186; CHECK: [[LABEL:\.L[^:]*]]: 187; CHECK: pfd 2, 768(%r2) 188; CHECK: mvc 0(256,%r2), 0(%r3) 189; CHECK: la %r2, 256(%r2) 190; CHECK: la %r3, 256(%r3) 191; CHECK: brctg [[COUNT]], [[LABEL]] 192; CHECK: mvc 0(1,%r2), 0(%r3) 193; CHECK: br %r14 194 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1537, i1 false) 195 ret void 196} 197 198; ...and again with frame bases, where the base must be loaded into a 199; register before the loop. 200define void @f16() { 201; CHECK-LABEL: f16: 202; CHECK: brasl %r14, foo@PLT 203; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6 204; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15) 205; CHECK: [[LABEL:\.L[^:]*]]: 206; CHECK: pfd 2, 2368([[BASE]]) 207; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]]) 208; CHECK: la [[BASE]], 256([[BASE]]) 209; CHECK: brctg [[COUNT]], [[LABEL]] 210; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]]) 211; CHECK: brasl %r14, foo@PLT 212; CHECK: br %r14 213 %arr = alloca [3200 x i8] 214 %dest = getelementptr [3200 x i8], ptr %arr, i64 0, i64 1600 215 call void @foo(ptr %dest, ptr %arr) 216 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %arr, i64 1537, i1 false) 217 call void @foo(ptr %dest, ptr %arr) 218 ret void 219} 220 221; Test a variable length loop. 222define void @f17(ptr %dest, ptr %src, i64 %Len) { 223; CHECK-LABEL: f17: 224; CHECK: # %bb.0: 225; CHECK-NEXT: aghi %r4, -1 226; CHECK-NEXT: cgibe %r4, -1, 0(%r14) 227; CHECK-NEXT: .LBB16_1: 228; CHECK-NEXT: srlg %r0, %r4, 8 229; CHECK-NEXT: cgije %r0, 0, .LBB16_3 230; CHECK-NEXT: .LBB16_2: # =>This Inner Loop Header: Depth=1 231; CHECK-NEXT: pfd 2, 768(%r2) 232; CHECK-NEXT: mvc 0(256,%r2), 0(%r3) 233; CHECK-NEXT: la %r2, 256(%r2) 234; CHECK-NEXT: la %r3, 256(%r3) 235; CHECK-NEXT: brctg %r0, .LBB16_2 236; CHECK-NEXT: .LBB16_3: 237; CHECK-NEXT: exrl %r4, .Ltmp0 238; CHECK-NEXT: br %r14 239 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 %Len, i1 false) 240 ret void 241} 242 243; CHECK: .Ltmp0: 244; CHECK-NEXT: mvc 0(1,%r2), 0(%r3) 245