1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target triple = "wasm64-unknown-unknown" 7 8declare void @llvm.memcpy.p0.p0.i8(ptr, ptr, i8, i1) 9declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) 10 11declare void @llvm.memmove.p0.p0.i8(ptr, ptr, i8, i1) 12declare void @llvm.memmove.p0.p0.i64(ptr, ptr, i64, i1) 13 14declare void @llvm.memset.p0.i8(ptr, i8, i8, i1) 15declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) 16 17; CHECK-LABEL: memcpy_i8: 18; NO-BULK-MEM-NOT: memory.copy 19; BULK-MEM-NEXT: .functype memcpy_i8 (i64, i64, i32) -> () 20; BULK-MEM-NEXT: block 21; BULK-MEM-NEXT: i64.extend_i32_u $push[[L0:[0-9]+]]=, $2 22; BULK-MEM-NEXT: local.tee $push[[L1:[0-9]+]]=, $3=, $pop[[L0]] 23; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 24; BULK-MEM-NEXT: br_if 0, $pop0 25; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $3 26; BULK-MEM-NEXT: .LBB{{.*}}: 27; BULK-MEM-NEXT: end_block 28; BULK-MEM-NEXT: return 29define void @memcpy_i8(ptr %dest, ptr %src, i8 zeroext %len) { 30 call void @llvm.memcpy.p0.p0.i8(ptr %dest, ptr %src, i8 %len, i1 0) 31 ret void 32} 33 34; CHECK-LABEL: memmove_i8: 35; NO-BULK-MEM-NOT: memory.copy 36; BULK-MEM-NEXT: .functype memmove_i8 (i64, i64, i32) -> () 37; BULK-MEM-NEXT: block 38; BULK-MEM-NEXT: i64.extend_i32_u $push[[L0:[0-9]+]]=, $2 39; BULK-MEM-NEXT: local.tee $push[[L1:[0-9]+]]=, $3=, $pop[[L0]] 40; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 41; BULK-MEM-NEXT: br_if 0, $pop0 42; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $3 43; BULK-MEM-NEXT: .LBB{{.*}}: 44; BULK-MEM-NEXT: end_block 45; BULK-MEM-NEXT: return 46define void @memmove_i8(ptr %dest, ptr %src, i8 zeroext %len) { 47 call void @llvm.memmove.p0.p0.i8(ptr %dest, ptr %src, i8 %len, i1 0) 48 ret void 49} 50 51; CHECK-LABEL: memset_i8: 52; NO-BULK-MEM-NOT: memory.fill 53; BULK-MEM-NEXT: .functype memset_i8 (i64, i32, i32) -> () 54; BULK-MEM-NEXT: block 55; BULK-MEM-NEXT: i64.extend_i32_u $push[[L0:[0-9]+]]=, $2 56; BULK-MEM-NEXT: local.tee $push[[L1:[0-9]+]]=, $3=, $pop[[L0]] 57; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 58; BULK-MEM-NEXT: br_if 0, $pop0 59; BULK-MEM-NEXT: memory.fill 0, $0, $1, $3 60; BULK-MEM-NEXT: .LBB{{.*}}: 61; BULK-MEM-NEXT: end_block 62; BULK-MEM-NEXT: return 63define void @memset_i8(ptr %dest, i8 %val, i8 zeroext %len) { 64 call void @llvm.memset.p0.i8(ptr %dest, i8 %val, i8 %len, i1 0) 65 ret void 66} 67 68; CHECK-LABEL: memcpy_i32: 69; NO-BULK-MEM-NOT: memory.copy 70; BULK-MEM-NEXT: .functype memcpy_i32 (i64, i64, i64) -> () 71; BULK-MEM-NEXT: block 72; BULK-MEM-NEXT: i64.eqz $push0=, $2 73; BULK-MEM-NEXT: br_if 0, $pop0 74; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 75; BULK-MEM-NEXT: .LBB{{.*}}: 76; BULK-MEM-NEXT: end_block 77; BULK-MEM-NEXT: return 78define void @memcpy_i32(ptr %dest, ptr %src, i64 %len) { 79 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 %len, i1 0) 80 ret void 81} 82 83; CHECK-LABEL: memmove_i32: 84; NO-BULK-MEM-NOT: memory.copy 85; BULK-MEM-NEXT: .functype memmove_i32 (i64, i64, i64) -> () 86; BULK-MEM-NEXT: block 87; BULK-MEM-NEXT: i64.eqz $push0=, $2 88; BULK-MEM-NEXT: br_if 0, $pop0 89; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 90; BULK-MEM-NEXT: .LBB{{.*}}: 91; BULK-MEM-NEXT: end_block 92; BULK-MEM-NEXT: return 93define void @memmove_i32(ptr %dest, ptr %src, i64 %len) { 94 call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 %len, i1 0) 95 ret void 96} 97 98; CHECK-LABEL: memset_i32: 99; NO-BULK-MEM-NOT: memory.fill 100; BULK-MEM-NEXT: .functype memset_i32 (i64, i32, i64) -> () 101; BULK-MEM-NEXT: block 102; BULK-MEM-NEXT: i64.eqz $push0=, $2 103; BULK-MEM-NEXT: br_if 0, $pop0 104; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 105; BULK-MEM-NEXT: .LBB{{.*}}: 106; BULK-MEM-NEXT: end_block 107; BULK-MEM-NEXT: return 108define void @memset_i32(ptr %dest, i8 %val, i64 %len) { 109 call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 %len, i1 0) 110 ret void 111} 112 113; CHECK-LABEL: memcpy_1: 114; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () 115; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 116; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 117; CHECK-NEXT: return 118define void @memcpy_1(ptr %dest, ptr %src) { 119 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 0) 120 ret void 121} 122 123; CHECK-LABEL: memmove_1: 124; CHECK-NEXT: .functype memmove_1 (i64, i64) -> () 125; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 126; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 127; CHECK-NEXT: return 128define void @memmove_1(ptr %dest, ptr %src) { 129 call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1, i1 0) 130 ret void 131} 132 133; CHECK-LABEL: memset_1: 134; NO-BULK-MEM-NOT: memory.fill 135; BULK-MEM-NEXT: .functype memset_1 (i64, i32) -> () 136; BULK-MEM-NEXT: i32.store8 0($0), $1 137; BULK-MEM-NEXT: return 138define void @memset_1(ptr %dest, i8 %val) { 139 call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1, i1 0) 140 ret void 141} 142 143; CHECK-LABEL: memcpy_1024: 144; NO-BULK-MEM-NOT: memory.copy 145; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () 146; BULK-MEM-NEXT: block 147; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 148; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 149; BULK-MEM-NEXT: br_if 0, $pop0 150; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 151; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 152; BULK-MEM-NEXT: .LBB{{.*}}: 153; BULK-MEM-NEXT: end_block 154; BULK-MEM-NEXT: return 155define void @memcpy_1024(ptr %dest, ptr %src) { 156 call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) 157 ret void 158} 159 160; CHECK-LABEL: memmove_1024: 161; NO-BULK-MEM-NOT: memory.copy 162; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () 163; BULK-MEM-NEXT: block 164; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 165; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 166; BULK-MEM-NEXT: br_if 0, $pop0 167; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 168; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] 169; BULK-MEM-NEXT: .LBB{{.*}}: 170; BULK-MEM-NEXT: end_block 171; BULK-MEM-NEXT: return 172define void @memmove_1024(ptr %dest, ptr %src) { 173 call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) 174 ret void 175} 176 177; CHECK-LABEL: memset_1024: 178; NO-BULK-MEM-NOT: memory.fill 179; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () 180; BULK-MEM-NEXT: block 181; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 182; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] 183; BULK-MEM-NEXT: br_if 0, $pop0 184; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 185; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] 186; BULK-MEM-NEXT: .LBB{{.*}}: 187; BULK-MEM-NEXT: end_block 188; BULK-MEM-NEXT: return 189define void @memset_1024(ptr %dest, i8 %val) { 190 call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0) 191 ret void 192} 193 194; The following tests check that frame index elimination works for 195; bulk memory instructions. The stack pointer is bumped by 112 instead 196; of 100 because the stack pointer in WebAssembly is currently always 197; 16-byte aligned, even in leaf functions, although it is not written 198; back to the global in this case. 199 200; TODO: Change TransientStackAlignment to 1 to avoid this extra 201; arithmetic. This will require forcing the use of StackAlignment in 202; PrologEpilogEmitter.cpp when 203; WebAssemblyFrameLowering::needsSPWriteback would be true. 204 205; CHECK-LABEL: memcpy_alloca_src: 206; NO-BULK-MEM-NOT: memory.copy 207; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () 208; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer 209; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 210; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] 211; BULK-MEM-NEXT: block 212; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 213; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] 214; BULK-MEM-NEXT: br_if 0, $pop[[L4]] 215; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 216; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] 217; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 218; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] 219; BULK-MEM-NEXT: .LBB{{.*}}: 220; BULK-MEM-NEXT: end_block 221; BULK-MEM-NEXT: return 222define void @memcpy_alloca_src(ptr %dst) { 223 %a = alloca [100 x i8] 224 call void @llvm.memcpy.p0.p0.i64(ptr %dst, ptr %a, i64 100, i1 false) 225 ret void 226} 227 228; CHECK-LABEL: memcpy_alloca_dst: 229; NO-BULK-MEM-NOT: memory.copy 230; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () 231; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer 232; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 233; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] 234; BULK-MEM-NEXT: block 235; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 236; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] 237; BULK-MEM-NEXT: br_if 0, $pop[[L4]] 238; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 239; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] 240; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 241; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] 242; BULK-MEM-NEXT: .LBB{{.*}}: 243; BULK-MEM-NEXT: end_block 244; BULK-MEM-NEXT: return 245define void @memcpy_alloca_dst(ptr %src) { 246 %a = alloca [100 x i8] 247 call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %src, i64 100, i1 false) 248 ret void 249} 250 251; CHECK-LABEL: memset_alloca: 252; NO-BULK-MEM-NOT: memory.fill 253; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 254; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer 255; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 256; BULK-MEM-NEXT: i64.sub $1=, $pop[[L1]], $pop[[L0]] 257; BULK-MEM-NEXT: block 258; BULK-MEM-NEXT: i64.const $push[[L2:[0-9]+]]=, 100 259; BULK-MEM-NEXT: i64.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] 260; BULK-MEM-NEXT: br_if 0, $pop[[L3]] 261; BULK-MEM-NEXT: i64.const $push[[L4:[0-9]+]]=, 12 262; BULK-MEM-NEXT: i64.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] 263; BULK-MEM-NEXT: i64.const $push[[L6:[0-9]+]]=, 100 264; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] 265; BULK-MEM-NEXT: .LBB{{.*}}: 266; BULK-MEM-NEXT: end_block 267; BULK-MEM-NEXT: return 268define void @memset_alloca(i8 %val) { 269 %a = alloca [100 x i8] 270 call void @llvm.memset.p0.i64(ptr %a, i8 %val, i64 100, i1 false) 271 ret void 272} 273