1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+bulk-memory | FileCheck %s --check-prefixes CHECK,BULK-MEM 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=-bulk-memory | FileCheck %s --check-prefixes CHECK,NO-BULK-MEM 3 4; Test that basic bulk memory codegen works correctly 5 6target triple = "wasm32-unknown-unknown" 7 8declare void @llvm.memcpy.p0.p0.i8(ptr, ptr, i8, i1) 9declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) 10 11declare void @llvm.memmove.p0.p0.i8(ptr, ptr, i8, i1) 12declare void @llvm.memmove.p0.p0.i32(ptr, ptr, i32, i1) 13 14declare void @llvm.memset.p0.i8(ptr, i8, i8, i1) 15declare void @llvm.memset.p0.i32(ptr, i8, i32, i1) 16 17; CHECK-LABEL: memcpy_i8: 18; NO-BULK-MEM-NOT: memory.copy 19; BULK-MEM-NEXT: .functype memcpy_i8 (i32, i32, i32) -> () 20; BULK-MEM-NEXT: block 21; BULK-MEM-NEXT: i32.eqz $push0=, $2 22; BULK-MEM-NEXT: br_if 0, $pop0 23; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 24; BULK-MEM-NEXT: .LBB{{.*}}: 25; BULK-MEM-NEXT: end_block 26; BULK-MEM-NEXT: return 27define void @memcpy_i8(ptr %dest, ptr %src, i8 zeroext %len) { 28 call void @llvm.memcpy.p0.p0.i8(ptr %dest, ptr %src, i8 %len, i1 0) 29 ret void 30} 31 32; CHECK-LABEL: memmove_i8: 33; NO-BULK-MEM-NOT: memory.copy 34; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> () 35; BULK-MEM-NEXT: block 36; BULK-MEM-NEXT: i32.eqz $push0=, $2 37; BULK-MEM-NEXT: br_if 0, $pop0 38; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 39; BULK-MEM-NEXT: .LBB{{.*}}: 40; BULK-MEM-NEXT: end_block 41; BULK-MEM-NEXT: return 42define void @memmove_i8(ptr %dest, ptr %src, i8 zeroext %len) { 43 call void @llvm.memmove.p0.p0.i8(ptr %dest, ptr %src, i8 %len, i1 0) 44 ret void 45} 46 47; CHECK-LABEL: memset_i8: 48; NO-BULK-MEM-NOT: memory.fill 49; BULK-MEM-NEXT: .functype memset_i8 (i32, i32, i32) -> () 50; BULK-MEM-NEXT: block 51; BULK-MEM-NEXT: i32.eqz $push0=, $2 52; BULK-MEM-NEXT: br_if 0, $pop0 53; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 54; BULK-MEM-NEXT: .LBB{{.*}}: 55; BULK-MEM-NEXT: end_block 56; BULK-MEM-NEXT: return 57define void @memset_i8(ptr %dest, i8 %val, i8 zeroext %len) { 58 call void @llvm.memset.p0.i8(ptr %dest, i8 %val, i8 %len, i1 0) 59 ret void 60} 61 62; CHECK-LABEL: memcpy_i32: 63; NO-BULK-MEM-NOT: memory.copy 64; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> () 65; BULK-MEM-NEXT: block 66; BULK-MEM-NEXT: i32.eqz $push0=, $2 67; BULK-MEM-NEXT: br_if 0, $pop0 68; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 69; BULK-MEM-NEXT: .LBB{{.*}}: 70; BULK-MEM-NEXT: end_block 71; BULK-MEM-NEXT: return 72define void @memcpy_i32(ptr %dest, ptr %src, i32 %len) { 73 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 0) 74 ret void 75} 76 77; CHECK-LABEL: memmove_i32: 78; NO-BULK-MEM-NOT: memory.copy 79; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> () 80; BULK-MEM-NEXT: block 81; BULK-MEM-NEXT: i32.eqz $push0=, $2 82; BULK-MEM-NEXT: br_if 0, $pop0 83; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $2 84; BULK-MEM-NEXT: .LBB{{.*}}: 85; BULK-MEM-NEXT: end_block 86; BULK-MEM-NEXT: return 87define void @memmove_i32(ptr %dest, ptr %src, i32 %len) { 88 call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 %len, i1 0) 89 ret void 90} 91 92; CHECK-LABEL: memset_i32: 93; NO-BULK-MEM-NOT: memory.fill 94; BULK-MEM-NEXT: .functype memset_i32 (i32, i32, i32) -> () 95; BULK-MEM-NEXT: block 96; BULK-MEM-NEXT: i32.eqz $push0=, $2 97; BULK-MEM-NEXT: br_if 0, $pop0 98; BULK-MEM-NEXT: memory.fill 0, $0, $1, $2 99; BULK-MEM-NEXT: .LBB{{.*}}: 100; BULK-MEM-NEXT: end_block 101; BULK-MEM-NEXT: return 102define void @memset_i32(ptr %dest, i8 %val, i32 %len) { 103 call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 %len, i1 0) 104 ret void 105} 106 107; CHECK-LABEL: memcpy_1: 108; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () 109; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 110; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 111; CHECK-NEXT: return 112define void @memcpy_1(ptr %dest, ptr %src) { 113 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1, i1 0) 114 ret void 115} 116 117; CHECK-LABEL: memmove_1: 118; CHECK-NEXT: .functype memmove_1 (i32, i32) -> () 119; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) 120; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] 121; CHECK-NEXT: return 122define void @memmove_1(ptr %dest, ptr %src) { 123 call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1, i1 0) 124 ret void 125} 126 127; CHECK-LABEL: memset_1: 128; NO-BULK-MEM-NOT: memory.fill 129; BULK-MEM-NEXT: .functype memset_1 (i32, i32) -> () 130; BULK-MEM-NEXT: i32.store8 0($0), $1 131; BULK-MEM-NEXT: return 132define void @memset_1(ptr %dest, i8 %val) { 133 call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1, i1 0) 134 ret void 135} 136 137; CHECK-LABEL: memcpy_1024: 138; NO-BULK-MEM-NOT: memory.copy 139; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () 140; BULK-MEM-NEXT: block 141; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 142; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] 143; BULK-MEM-NEXT: br_if 0, $pop[[L1]] 144; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 145; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] 146; BULK-MEM-NEXT: .LBB{{.*}}: 147; BULK-MEM-NEXT: end_block 148; BULK-MEM-NEXT: return 149define void @memcpy_1024(ptr %dest, ptr %src) { 150 call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) 151 ret void 152} 153 154; CHECK-LABEL: memmove_1024: 155; NO-BULK-MEM-NOT: memory.copy 156; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () 157; BULK-MEM-NEXT: block 158; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 159; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] 160; BULK-MEM-NEXT: br_if 0, $pop[[L1]] 161; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 162; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] 163; BULK-MEM-NEXT: .LBB{{.*}}: 164; BULK-MEM-NEXT: end_block 165; BULK-MEM-NEXT: return 166define void @memmove_1024(ptr %dest, ptr %src) { 167 call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) 168 ret void 169} 170 171; CHECK-LABEL: memset_1024: 172; NO-BULK-MEM-NOT: memory.fill 173; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () 174; BULK-MEM-NEXT: block 175; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 176; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] 177; BULK-MEM-NEXT: br_if 0, $pop[[L1]] 178; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 179; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]] 180; BULK-MEM-NEXT: .LBB{{.*}}: 181; BULK-MEM-NEXT: end_block 182; BULK-MEM-NEXT: return 183define void @memset_1024(ptr %dest, i8 %val) { 184 call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0) 185 ret void 186} 187 188; The following tests check that frame index elimination works for 189; bulk memory instructions. The stack pointer is bumped by 112 instead 190; of 100 because the stack pointer in WebAssembly is currently always 191; 16-byte aligned, even in leaf functions, although it is not written 192; back to the global in this case. 193 194; TODO: Change TransientStackAlignment to 1 to avoid this extra 195; arithmetic. This will require forcing the use of StackAlignment in 196; PrologEpilogEmitter.cpp when 197; WebAssemblyFrameLowering::needsSPWriteback would be true. 198 199; CHECK-LABEL: memcpy_alloca_src: 200; NO-BULK-MEM-NOT: memory.copy 201; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () 202; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 203; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 204; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 205; BULK-MEM-NEXT: block 206; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 207; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] 208; BULK-MEM-NEXT: br_if 0, $pop[[L4]] 209; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 210; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] 211; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 212; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] 213; BULK-MEM-NEXT: .LBB{{.*}}: 214; BULK-MEM-NEXT: end_block 215; BULK-MEM-NEXT: return 216define void @memcpy_alloca_src(ptr %dst) { 217 %a = alloca [100 x i8] 218 call void @llvm.memcpy.p0.p0.i32(ptr %dst, ptr %a, i32 100, i1 false) 219 ret void 220} 221 222; CHECK-LABEL: memcpy_alloca_dst: 223; NO-BULK-MEM-NOT: memory.copy 224; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () 225; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 226; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 227; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] 228; BULK-MEM-NEXT: block 229; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 230; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] 231; BULK-MEM-NEXT: br_if 0, $pop[[L4]] 232; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 233; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] 234; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 235; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] 236; BULK-MEM-NEXT: .LBB{{.*}}: 237; BULK-MEM-NEXT: end_block 238; BULK-MEM-NEXT: return 239define void @memcpy_alloca_dst(ptr %src) { 240 %a = alloca [100 x i8] 241 call void @llvm.memcpy.p0.p0.i32(ptr %a, ptr %src, i32 100, i1 false) 242 ret void 243} 244 245; CHECK-LABEL: memset_alloca: 246; NO-BULK-MEM-NOT: memory.fill 247; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () 248; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer 249; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 250; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]] 251; BULK-MEM-NEXT: block 252; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100 253; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] 254; BULK-MEM-NEXT: br_if 0, $pop[[L3]] 255; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12 256; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] 257; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100 258; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] 259; BULK-MEM-NEXT: .LBB{{.*}}: 260; BULK-MEM-NEXT: end_block 261; BULK-MEM-NEXT: return 262define void @memset_alloca(i8 %val) { 263 %a = alloca [100 x i8] 264 call void @llvm.memset.p0.i32(ptr %a, i8 %val, i32 100, i1 false) 265 ret void 266} 267