1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s 4 5; Testing codegen for memcpy with vector operands for all combinations of the following parameters: 6; destination address space: 0, 1, 3, 5 7; source address space: 0, 1, 3, 4, 5 8; alignment: 1, 2, 8, 16 9; sizes: 16, 31, 32 10 11 12define void @memcpy_p0_p0_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 13; CHECK-LABEL: memcpy_p0_p0_sz16_align_1_1: 14; CHECK: ; %bb.0: ; %entry 15; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 17; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 18; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 19; CHECK-NEXT: s_waitcnt lgkmcnt(0) 20; CHECK-NEXT: s_setpc_b64 s[30:31] 21entry: 22 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) 23 ret void 24} 25 26define void @memcpy_p0_p0_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 27; CHECK-LABEL: memcpy_p0_p0_sz31_align_1_1: 28; CHECK: ; %bb.0: ; %entry 29; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; CHECK-NEXT: s_clause 0x3 31; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 32; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 33; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 34; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 35; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) 36; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 37; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) 38; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 39; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) 40; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 41; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) 42; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 43; CHECK-NEXT: s_waitcnt lgkmcnt(0) 44; CHECK-NEXT: s_setpc_b64 s[30:31] 45entry: 46 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) 47 ret void 48} 49 50define void @memcpy_p0_p0_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 51; CHECK-LABEL: memcpy_p0_p0_sz32_align_1_1: 52; CHECK: ; %bb.0: ; %entry 53; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; CHECK-NEXT: s_clause 0x1 55; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 56; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 57; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 58; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 59; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 60; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 61; CHECK-NEXT: s_waitcnt lgkmcnt(0) 62; CHECK-NEXT: s_setpc_b64 s[30:31] 63entry: 64 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) 65 ret void 66} 67 68define void @memcpy_p0_p0_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 69; CHECK-LABEL: memcpy_p0_p0_sz16_align_2_2: 70; CHECK: ; %bb.0: ; %entry 71; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 73; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 74; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 75; CHECK-NEXT: s_waitcnt lgkmcnt(0) 76; CHECK-NEXT: s_setpc_b64 s[30:31] 77entry: 78 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) 79 ret void 80} 81 82define void @memcpy_p0_p0_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 83; CHECK-LABEL: memcpy_p0_p0_sz31_align_2_2: 84; CHECK: ; %bb.0: ; %entry 85; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; CHECK-NEXT: s_clause 0x3 87; CHECK-NEXT: flat_load_ubyte v9, v[2:3] offset:30 88; CHECK-NEXT: flat_load_ushort v10, v[2:3] offset:28 89; CHECK-NEXT: flat_load_dwordx3 v[6:8], v[2:3] offset:16 90; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 91; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) 92; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 93; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(3) 94; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 95; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(3) 96; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 97; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(3) 98; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 99; CHECK-NEXT: s_waitcnt lgkmcnt(0) 100; CHECK-NEXT: s_setpc_b64 s[30:31] 101entry: 102 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) 103 ret void 104} 105 106define void @memcpy_p0_p0_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 107; CHECK-LABEL: memcpy_p0_p0_sz32_align_2_2: 108; CHECK: ; %bb.0: ; %entry 109; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; CHECK-NEXT: s_clause 0x1 111; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 112; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 113; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 114; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 115; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 116; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 117; CHECK-NEXT: s_waitcnt lgkmcnt(0) 118; CHECK-NEXT: s_setpc_b64 s[30:31] 119entry: 120 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) 121 ret void 122} 123 124define void @memcpy_p0_p0_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 125; CHECK-LABEL: memcpy_p0_p0_sz16_align_8_8: 126; CHECK: ; %bb.0: ; %entry 127; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 129; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 130; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 131; CHECK-NEXT: s_waitcnt lgkmcnt(0) 132; CHECK-NEXT: s_setpc_b64 s[30:31] 133entry: 134 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) 135 ret void 136} 137 138define void @memcpy_p0_p0_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 139; CHECK-LABEL: memcpy_p0_p0_sz31_align_8_8: 140; CHECK: ; %bb.0: ; %entry 141; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; CHECK-NEXT: s_clause 0x1 143; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 144; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 145; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 146; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 147; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 148; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 149; CHECK-NEXT: s_waitcnt lgkmcnt(0) 150; CHECK-NEXT: s_setpc_b64 s[30:31] 151entry: 152 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) 153 ret void 154} 155 156define void @memcpy_p0_p0_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 157; CHECK-LABEL: memcpy_p0_p0_sz32_align_8_8: 158; CHECK: ; %bb.0: ; %entry 159; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160; CHECK-NEXT: s_clause 0x1 161; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 162; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 163; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 164; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 165; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 166; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 167; CHECK-NEXT: s_waitcnt lgkmcnt(0) 168; CHECK-NEXT: s_setpc_b64 s[30:31] 169entry: 170 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) 171 ret void 172} 173 174define void @memcpy_p0_p0_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 175; CHECK-LABEL: memcpy_p0_p0_sz16_align_16_16: 176; CHECK: ; %bb.0: ; %entry 177; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 179; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 180; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 181; CHECK-NEXT: s_waitcnt lgkmcnt(0) 182; CHECK-NEXT: s_setpc_b64 s[30:31] 183entry: 184 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) 185 ret void 186} 187 188define void @memcpy_p0_p0_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 189; CHECK-LABEL: memcpy_p0_p0_sz31_align_16_16: 190; CHECK: ; %bb.0: ; %entry 191; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; CHECK-NEXT: s_clause 0x1 193; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 194; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 195; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 196; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 197; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 198; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 199; CHECK-NEXT: s_waitcnt lgkmcnt(0) 200; CHECK-NEXT: s_setpc_b64 s[30:31] 201entry: 202 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) 203 ret void 204} 205 206define void @memcpy_p0_p0_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 207; CHECK-LABEL: memcpy_p0_p0_sz32_align_16_16: 208; CHECK: ; %bb.0: ; %entry 209; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; CHECK-NEXT: s_clause 0x1 211; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 212; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 213; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 214; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 215; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 216; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 217; CHECK-NEXT: s_waitcnt lgkmcnt(0) 218; CHECK-NEXT: s_setpc_b64 s[30:31] 219entry: 220 tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) 221 ret void 222} 223 224define void @memcpy_p0_p1_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 225; CHECK-LABEL: memcpy_p0_p1_sz16_align_1_1: 226; CHECK: ; %bb.0: ; %entry 227; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 229; CHECK-NEXT: s_waitcnt vmcnt(0) 230; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 231; CHECK-NEXT: s_waitcnt lgkmcnt(0) 232; CHECK-NEXT: s_setpc_b64 s[30:31] 233entry: 234 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) 235 ret void 236} 237 238define void @memcpy_p0_p1_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 239; CHECK-LABEL: memcpy_p0_p1_sz31_align_1_1: 240; CHECK: ; %bb.0: ; %entry 241; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; CHECK-NEXT: s_clause 0x3 243; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 244; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 245; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 246; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 247; CHECK-NEXT: s_waitcnt vmcnt(3) 248; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 249; CHECK-NEXT: s_waitcnt vmcnt(2) 250; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 251; CHECK-NEXT: s_waitcnt vmcnt(1) 252; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 253; CHECK-NEXT: s_waitcnt vmcnt(0) 254; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 255; CHECK-NEXT: s_waitcnt lgkmcnt(0) 256; CHECK-NEXT: s_setpc_b64 s[30:31] 257entry: 258 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) 259 ret void 260} 261 262define void @memcpy_p0_p1_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 263; CHECK-LABEL: memcpy_p0_p1_sz32_align_1_1: 264; CHECK: ; %bb.0: ; %entry 265; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; CHECK-NEXT: s_clause 0x1 267; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 268; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 269; CHECK-NEXT: s_waitcnt vmcnt(1) 270; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 271; CHECK-NEXT: s_waitcnt vmcnt(0) 272; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 273; CHECK-NEXT: s_waitcnt lgkmcnt(0) 274; CHECK-NEXT: s_setpc_b64 s[30:31] 275entry: 276 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) 277 ret void 278} 279 280define void @memcpy_p0_p1_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 281; CHECK-LABEL: memcpy_p0_p1_sz16_align_2_2: 282; CHECK: ; %bb.0: ; %entry 283; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 285; CHECK-NEXT: s_waitcnt vmcnt(0) 286; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 287; CHECK-NEXT: s_waitcnt lgkmcnt(0) 288; CHECK-NEXT: s_setpc_b64 s[30:31] 289entry: 290 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) 291 ret void 292} 293 294define void @memcpy_p0_p1_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 295; CHECK-LABEL: memcpy_p0_p1_sz31_align_2_2: 296; CHECK: ; %bb.0: ; %entry 297; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; CHECK-NEXT: s_clause 0x3 299; CHECK-NEXT: global_load_ubyte v9, v[2:3], off offset:30 300; CHECK-NEXT: global_load_ushort v10, v[2:3], off offset:28 301; CHECK-NEXT: global_load_dwordx3 v[6:8], v[2:3], off offset:16 302; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 303; CHECK-NEXT: s_waitcnt vmcnt(3) 304; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 305; CHECK-NEXT: s_waitcnt vmcnt(2) 306; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 307; CHECK-NEXT: s_waitcnt vmcnt(1) 308; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 309; CHECK-NEXT: s_waitcnt vmcnt(0) 310; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 311; CHECK-NEXT: s_waitcnt lgkmcnt(0) 312; CHECK-NEXT: s_setpc_b64 s[30:31] 313entry: 314 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) 315 ret void 316} 317 318define void @memcpy_p0_p1_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 319; CHECK-LABEL: memcpy_p0_p1_sz32_align_2_2: 320; CHECK: ; %bb.0: ; %entry 321; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; CHECK-NEXT: s_clause 0x1 323; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 324; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 325; CHECK-NEXT: s_waitcnt vmcnt(1) 326; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 327; CHECK-NEXT: s_waitcnt vmcnt(0) 328; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 329; CHECK-NEXT: s_waitcnt lgkmcnt(0) 330; CHECK-NEXT: s_setpc_b64 s[30:31] 331entry: 332 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) 333 ret void 334} 335 336define void @memcpy_p0_p1_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 337; CHECK-LABEL: memcpy_p0_p1_sz16_align_8_8: 338; CHECK: ; %bb.0: ; %entry 339; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 341; CHECK-NEXT: s_waitcnt vmcnt(0) 342; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 343; CHECK-NEXT: s_waitcnt lgkmcnt(0) 344; CHECK-NEXT: s_setpc_b64 s[30:31] 345entry: 346 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) 347 ret void 348} 349 350define void @memcpy_p0_p1_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 351; CHECK-LABEL: memcpy_p0_p1_sz31_align_8_8: 352; CHECK: ; %bb.0: ; %entry 353; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; CHECK-NEXT: s_clause 0x1 355; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 356; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 357; CHECK-NEXT: s_waitcnt vmcnt(1) 358; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 359; CHECK-NEXT: s_waitcnt vmcnt(0) 360; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 361; CHECK-NEXT: s_waitcnt lgkmcnt(0) 362; CHECK-NEXT: s_setpc_b64 s[30:31] 363entry: 364 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) 365 ret void 366} 367 368define void @memcpy_p0_p1_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 369; CHECK-LABEL: memcpy_p0_p1_sz32_align_8_8: 370; CHECK: ; %bb.0: ; %entry 371; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; CHECK-NEXT: s_clause 0x1 373; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 374; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 375; CHECK-NEXT: s_waitcnt vmcnt(1) 376; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 377; CHECK-NEXT: s_waitcnt vmcnt(0) 378; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 379; CHECK-NEXT: s_waitcnt lgkmcnt(0) 380; CHECK-NEXT: s_setpc_b64 s[30:31] 381entry: 382 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) 383 ret void 384} 385 386define void @memcpy_p0_p1_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 387; CHECK-LABEL: memcpy_p0_p1_sz16_align_16_16: 388; CHECK: ; %bb.0: ; %entry 389; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 391; CHECK-NEXT: s_waitcnt vmcnt(0) 392; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 393; CHECK-NEXT: s_waitcnt lgkmcnt(0) 394; CHECK-NEXT: s_setpc_b64 s[30:31] 395entry: 396 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) 397 ret void 398} 399 400define void @memcpy_p0_p1_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 401; CHECK-LABEL: memcpy_p0_p1_sz31_align_16_16: 402; CHECK: ; %bb.0: ; %entry 403; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; CHECK-NEXT: s_clause 0x1 405; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 406; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 407; CHECK-NEXT: s_waitcnt vmcnt(1) 408; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:15 409; CHECK-NEXT: s_waitcnt vmcnt(0) 410; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 411; CHECK-NEXT: s_waitcnt lgkmcnt(0) 412; CHECK-NEXT: s_setpc_b64 s[30:31] 413entry: 414 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) 415 ret void 416} 417 418define void @memcpy_p0_p1_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 419; CHECK-LABEL: memcpy_p0_p1_sz32_align_16_16: 420; CHECK: ; %bb.0: ; %entry 421; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; CHECK-NEXT: s_clause 0x1 423; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 424; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 425; CHECK-NEXT: s_waitcnt vmcnt(1) 426; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] offset:16 427; CHECK-NEXT: s_waitcnt vmcnt(0) 428; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 429; CHECK-NEXT: s_waitcnt lgkmcnt(0) 430; CHECK-NEXT: s_setpc_b64 s[30:31] 431entry: 432 tail call void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) 433 ret void 434} 435 436define void @memcpy_p0_p3_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 437; CHECK-LABEL: memcpy_p0_p3_sz16_align_1_1: 438; CHECK: ; %bb.0: ; %entry 439; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 441; CHECK-NEXT: s_waitcnt lgkmcnt(0) 442; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 443; CHECK-NEXT: s_waitcnt lgkmcnt(0) 444; CHECK-NEXT: s_setpc_b64 s[30:31] 445entry: 446 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) 447 ret void 448} 449 450define void @memcpy_p0_p3_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 451; CHECK-LABEL: memcpy_p0_p3_sz31_align_1_1: 452; CHECK: ; %bb.0: ; %entry 453; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 454; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 455; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 456; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 457; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 458; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 459; CHECK-NEXT: s_waitcnt lgkmcnt(4) 460; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 461; CHECK-NEXT: s_waitcnt lgkmcnt(3) 462; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 463; CHECK-NEXT: s_waitcnt lgkmcnt(3) 464; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 465; CHECK-NEXT: s_waitcnt lgkmcnt(3) 466; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 467; CHECK-NEXT: s_waitcnt lgkmcnt(0) 468; CHECK-NEXT: s_setpc_b64 s[30:31] 469entry: 470 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) 471 ret void 472} 473 474define void @memcpy_p0_p3_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 475; CHECK-LABEL: memcpy_p0_p3_sz32_align_1_1: 476; CHECK: ; %bb.0: ; %entry 477; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 479; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 480; CHECK-NEXT: s_waitcnt lgkmcnt(1) 481; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 482; CHECK-NEXT: s_waitcnt lgkmcnt(1) 483; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 484; CHECK-NEXT: s_waitcnt lgkmcnt(0) 485; CHECK-NEXT: s_setpc_b64 s[30:31] 486entry: 487 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) 488 ret void 489} 490 491define void @memcpy_p0_p3_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 492; CHECK-LABEL: memcpy_p0_p3_sz16_align_2_2: 493; CHECK: ; %bb.0: ; %entry 494; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 496; CHECK-NEXT: s_waitcnt lgkmcnt(0) 497; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 498; CHECK-NEXT: s_waitcnt lgkmcnt(0) 499; CHECK-NEXT: s_setpc_b64 s[30:31] 500entry: 501 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) 502 ret void 503} 504 505define void @memcpy_p0_p3_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 506; CHECK-LABEL: memcpy_p0_p3_sz31_align_2_2: 507; CHECK: ; %bb.0: ; %entry 508; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 509; CHECK-NEXT: ds_read_u8 v9, v2 offset:30 510; CHECK-NEXT: ds_read_b32 v8, v2 offset:24 511; CHECK-NEXT: ds_read_u16 v10, v2 offset:28 512; CHECK-NEXT: ds_read_b64 v[6:7], v2 offset:16 513; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 514; CHECK-NEXT: s_waitcnt lgkmcnt(4) 515; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:30 516; CHECK-NEXT: s_waitcnt lgkmcnt(3) 517; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 518; CHECK-NEXT: s_waitcnt lgkmcnt(3) 519; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[6:8] offset:16 520; CHECK-NEXT: s_waitcnt lgkmcnt(3) 521; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 522; CHECK-NEXT: s_waitcnt lgkmcnt(0) 523; CHECK-NEXT: s_setpc_b64 s[30:31] 524entry: 525 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) 526 ret void 527} 528 529define void @memcpy_p0_p3_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 530; CHECK-LABEL: memcpy_p0_p3_sz32_align_2_2: 531; CHECK: ; %bb.0: ; %entry 532; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 534; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 535; CHECK-NEXT: s_waitcnt lgkmcnt(1) 536; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 537; CHECK-NEXT: s_waitcnt lgkmcnt(1) 538; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 539; CHECK-NEXT: s_waitcnt lgkmcnt(0) 540; CHECK-NEXT: s_setpc_b64 s[30:31] 541entry: 542 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) 543 ret void 544} 545 546define void @memcpy_p0_p3_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 547; CHECK-LABEL: memcpy_p0_p3_sz16_align_8_8: 548; CHECK: ; %bb.0: ; %entry 549; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 551; CHECK-NEXT: s_waitcnt lgkmcnt(0) 552; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 553; CHECK-NEXT: s_waitcnt lgkmcnt(0) 554; CHECK-NEXT: s_setpc_b64 s[30:31] 555entry: 556 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) 557 ret void 558} 559 560define void @memcpy_p0_p3_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 561; CHECK-LABEL: memcpy_p0_p3_sz31_align_8_8: 562; CHECK: ; %bb.0: ; %entry 563; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 564; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15 565; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 566; CHECK-NEXT: s_waitcnt lgkmcnt(1) 567; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15 568; CHECK-NEXT: s_waitcnt lgkmcnt(1) 569; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 570; CHECK-NEXT: s_waitcnt lgkmcnt(0) 571; CHECK-NEXT: s_setpc_b64 s[30:31] 572entry: 573 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) 574 ret void 575} 576 577define void @memcpy_p0_p3_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 578; CHECK-LABEL: memcpy_p0_p3_sz32_align_8_8: 579; CHECK: ; %bb.0: ; %entry 580; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset0:2 offset1:3 582; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset1:1 583; CHECK-NEXT: s_waitcnt lgkmcnt(1) 584; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 585; CHECK-NEXT: s_waitcnt lgkmcnt(1) 586; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 587; CHECK-NEXT: s_waitcnt lgkmcnt(0) 588; CHECK-NEXT: s_setpc_b64 s[30:31] 589entry: 590 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) 591 ret void 592} 593 594define void @memcpy_p0_p3_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 595; CHECK-LABEL: memcpy_p0_p3_sz16_align_16_16: 596; CHECK: ; %bb.0: ; %entry 597; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; CHECK-NEXT: ds_read_b128 v[2:5], v2 599; CHECK-NEXT: s_waitcnt lgkmcnt(0) 600; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 601; CHECK-NEXT: s_waitcnt lgkmcnt(0) 602; CHECK-NEXT: s_setpc_b64 s[30:31] 603entry: 604 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) 605 ret void 606} 607 608define void @memcpy_p0_p3_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 609; CHECK-LABEL: memcpy_p0_p3_sz31_align_16_16: 610; CHECK: ; %bb.0: ; %entry 611; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:15 613; CHECK-NEXT: ds_read_b128 v[7:10], v2 614; CHECK-NEXT: s_waitcnt lgkmcnt(1) 615; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:15 616; CHECK-NEXT: s_waitcnt lgkmcnt(1) 617; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 618; CHECK-NEXT: s_waitcnt lgkmcnt(0) 619; CHECK-NEXT: s_setpc_b64 s[30:31] 620entry: 621 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) 622 ret void 623} 624 625define void @memcpy_p0_p3_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 626; CHECK-LABEL: memcpy_p0_p3_sz32_align_16_16: 627; CHECK: ; %bb.0: ; %entry 628; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:16 630; CHECK-NEXT: ds_read_b128 v[7:10], v2 631; CHECK-NEXT: s_waitcnt lgkmcnt(1) 632; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 633; CHECK-NEXT: s_waitcnt lgkmcnt(1) 634; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 635; CHECK-NEXT: s_waitcnt lgkmcnt(0) 636; CHECK-NEXT: s_setpc_b64 s[30:31] 637entry: 638 tail call void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) 639 ret void 640} 641 642define void @memcpy_p0_p4_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 643; CHECK-LABEL: memcpy_p0_p4_sz16_align_1_1: 644; CHECK: ; %bb.0: ; %entry 645; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 647; CHECK-NEXT: s_waitcnt vmcnt(0) 648; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 649; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 650; CHECK-NEXT: s_waitcnt vmcnt(0) 651; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 652; CHECK-NEXT: s_waitcnt lgkmcnt(0) 653; CHECK-NEXT: s_setpc_b64 s[30:31] 654entry: 655 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) 656 ret void 657} 658 659define void @memcpy_p0_p4_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 660; CHECK-LABEL: memcpy_p0_p4_sz31_align_1_1: 661; CHECK: ; %bb.0: ; %entry 662; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 664; CHECK-NEXT: s_waitcnt vmcnt(0) 665; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 666; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 667; CHECK-NEXT: s_waitcnt vmcnt(0) 668; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 669; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 670; CHECK-NEXT: s_waitcnt vmcnt(0) 671; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 672; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 673; CHECK-NEXT: s_waitcnt vmcnt(0) 674; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 675; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 676; CHECK-NEXT: s_waitcnt vmcnt(0) 677; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 678; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 679; CHECK-NEXT: s_waitcnt vmcnt(0) 680; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 681; CHECK-NEXT: s_waitcnt lgkmcnt(0) 682; CHECK-NEXT: s_setpc_b64 s[30:31] 683entry: 684 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) 685 ret void 686} 687 688define void @memcpy_p0_p4_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 689; CHECK-LABEL: memcpy_p0_p4_sz32_align_1_1: 690; CHECK: ; %bb.0: ; %entry 691; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 692; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 693; CHECK-NEXT: s_waitcnt vmcnt(0) 694; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 695; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 696; CHECK-NEXT: s_waitcnt vmcnt(0) 697; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 698; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 699; CHECK-NEXT: s_waitcnt vmcnt(0) 700; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 701; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 702; CHECK-NEXT: s_waitcnt vmcnt(0) 703; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 704; CHECK-NEXT: s_waitcnt lgkmcnt(0) 705; CHECK-NEXT: s_setpc_b64 s[30:31] 706entry: 707 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) 708 ret void 709} 710 711define void @memcpy_p0_p4_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 712; CHECK-LABEL: memcpy_p0_p4_sz16_align_2_2: 713; CHECK: ; %bb.0: ; %entry 714; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 715; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 716; CHECK-NEXT: s_waitcnt vmcnt(0) 717; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 718; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:8 719; CHECK-NEXT: s_waitcnt vmcnt(0) 720; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:8 721; CHECK-NEXT: s_waitcnt lgkmcnt(0) 722; CHECK-NEXT: s_setpc_b64 s[30:31] 723entry: 724 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) 725 ret void 726} 727 728define void @memcpy_p0_p4_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 729; CHECK-LABEL: memcpy_p0_p4_sz31_align_2_2: 730; CHECK: ; %bb.0: ; %entry 731; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 732; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 733; CHECK-NEXT: s_waitcnt vmcnt(0) 734; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 735; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 736; CHECK-NEXT: s_waitcnt vmcnt(0) 737; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 738; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 739; CHECK-NEXT: s_waitcnt vmcnt(0) 740; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 741; CHECK-NEXT: global_load_dword v4, v[2:3], off offset:24 742; CHECK-NEXT: s_waitcnt vmcnt(0) 743; CHECK-NEXT: flat_store_dword v[0:1], v4 offset:24 744; CHECK-NEXT: global_load_ushort v4, v[2:3], off offset:28 745; CHECK-NEXT: s_waitcnt vmcnt(0) 746; CHECK-NEXT: flat_store_short v[0:1], v4 offset:28 747; CHECK-NEXT: global_load_ubyte v2, v[2:3], off offset:30 748; CHECK-NEXT: s_waitcnt vmcnt(0) 749; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:30 750; CHECK-NEXT: s_waitcnt lgkmcnt(0) 751; CHECK-NEXT: s_setpc_b64 s[30:31] 752entry: 753 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) 754 ret void 755} 756 757define void @memcpy_p0_p4_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 758; CHECK-LABEL: memcpy_p0_p4_sz32_align_2_2: 759; CHECK: ; %bb.0: ; %entry 760; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 761; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off 762; CHECK-NEXT: s_waitcnt vmcnt(0) 763; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] 764; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:8 765; CHECK-NEXT: s_waitcnt vmcnt(0) 766; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:8 767; CHECK-NEXT: global_load_dwordx2 v[4:5], v[2:3], off offset:16 768; CHECK-NEXT: s_waitcnt vmcnt(0) 769; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[4:5] offset:16 770; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:24 771; CHECK-NEXT: s_waitcnt vmcnt(0) 772; CHECK-NEXT: flat_store_dwordx2 v[0:1], v[2:3] offset:24 773; CHECK-NEXT: s_waitcnt lgkmcnt(0) 774; CHECK-NEXT: s_setpc_b64 s[30:31] 775entry: 776 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) 777 ret void 778} 779 780define void @memcpy_p0_p4_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 781; CHECK-LABEL: memcpy_p0_p4_sz16_align_8_8: 782; CHECK: ; %bb.0: ; %entry 783; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 784; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 785; CHECK-NEXT: s_waitcnt vmcnt(0) 786; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 787; CHECK-NEXT: s_waitcnt lgkmcnt(0) 788; CHECK-NEXT: s_setpc_b64 s[30:31] 789entry: 790 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) 791 ret void 792} 793 794define void @memcpy_p0_p4_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 795; CHECK-LABEL: memcpy_p0_p4_sz31_align_8_8: 796; CHECK: ; %bb.0: ; %entry 797; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 798; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 799; CHECK-NEXT: s_waitcnt vmcnt(0) 800; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 801; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 802; CHECK-NEXT: s_waitcnt vmcnt(0) 803; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15 804; CHECK-NEXT: s_waitcnt lgkmcnt(0) 805; CHECK-NEXT: s_setpc_b64 s[30:31] 806entry: 807 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) 808 ret void 809} 810 811define void @memcpy_p0_p4_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 812; CHECK-LABEL: memcpy_p0_p4_sz32_align_8_8: 813; CHECK: ; %bb.0: ; %entry 814; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 815; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 816; CHECK-NEXT: s_waitcnt vmcnt(0) 817; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 818; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 819; CHECK-NEXT: s_waitcnt vmcnt(0) 820; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 821; CHECK-NEXT: s_waitcnt lgkmcnt(0) 822; CHECK-NEXT: s_setpc_b64 s[30:31] 823entry: 824 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) 825 ret void 826} 827 828define void @memcpy_p0_p4_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 829; CHECK-LABEL: memcpy_p0_p4_sz16_align_16_16: 830; CHECK: ; %bb.0: ; %entry 831; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 833; CHECK-NEXT: s_waitcnt vmcnt(0) 834; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 835; CHECK-NEXT: s_waitcnt lgkmcnt(0) 836; CHECK-NEXT: s_setpc_b64 s[30:31] 837entry: 838 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) 839 ret void 840} 841 842define void @memcpy_p0_p4_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 843; CHECK-LABEL: memcpy_p0_p4_sz31_align_16_16: 844; CHECK: ; %bb.0: ; %entry 845; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 846; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 847; CHECK-NEXT: s_waitcnt vmcnt(0) 848; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 849; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 850; CHECK-NEXT: s_waitcnt vmcnt(0) 851; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:15 852; CHECK-NEXT: s_waitcnt lgkmcnt(0) 853; CHECK-NEXT: s_setpc_b64 s[30:31] 854entry: 855 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) 856 ret void 857} 858 859define void @memcpy_p0_p4_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 860; CHECK-LABEL: memcpy_p0_p4_sz32_align_16_16: 861; CHECK: ; %bb.0: ; %entry 862; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 863; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 864; CHECK-NEXT: s_waitcnt vmcnt(0) 865; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 866; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 867; CHECK-NEXT: s_waitcnt vmcnt(0) 868; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[2:5] offset:16 869; CHECK-NEXT: s_waitcnt lgkmcnt(0) 870; CHECK-NEXT: s_setpc_b64 s[30:31] 871entry: 872 tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) 873 ret void 874} 875 876define void @memcpy_p0_p5_sz16_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 877; CHECK-LABEL: memcpy_p0_p5_sz16_align_1_1: 878; CHECK: ; %bb.0: ; %entry 879; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 880; CHECK-NEXT: s_clause 0x3 881; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 882; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 883; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 884; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 885; CHECK-NEXT: s_waitcnt vmcnt(0) 886; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 887; CHECK-NEXT: s_waitcnt lgkmcnt(0) 888; CHECK-NEXT: s_setpc_b64 s[30:31] 889entry: 890 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) 891 ret void 892} 893 894define void @memcpy_p0_p5_sz31_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 895; CHECK-LABEL: memcpy_p0_p5_sz31_align_1_1: 896; CHECK: ; %bb.0: ; %entry 897; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; CHECK-NEXT: s_clause 0x8 899; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 900; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 901; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 902; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28 903; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30 904; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 905; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 906; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 907; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 908; CHECK-NEXT: s_waitcnt vmcnt(5) 909; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 910; CHECK-NEXT: s_waitcnt vmcnt(4) 911; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30 912; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 913; CHECK-NEXT: s_waitcnt vmcnt(0) 914; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 915; CHECK-NEXT: s_waitcnt lgkmcnt(0) 916; CHECK-NEXT: s_setpc_b64 s[30:31] 917entry: 918 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) 919 ret void 920} 921 922define void @memcpy_p0_p5_sz32_align_1_1(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 923; CHECK-LABEL: memcpy_p0_p5_sz32_align_1_1: 924; CHECK: ; %bb.0: ; %entry 925; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 926; CHECK-NEXT: s_clause 0x7 927; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 928; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 929; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 930; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 931; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen 932; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 933; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8 934; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12 935; CHECK-NEXT: s_waitcnt vmcnt(4) 936; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 937; CHECK-NEXT: s_waitcnt vmcnt(0) 938; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 939; CHECK-NEXT: s_waitcnt lgkmcnt(0) 940; CHECK-NEXT: s_setpc_b64 s[30:31] 941entry: 942 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) 943 ret void 944} 945 946define void @memcpy_p0_p5_sz16_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 947; CHECK-LABEL: memcpy_p0_p5_sz16_align_2_2: 948; CHECK: ; %bb.0: ; %entry 949; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; CHECK-NEXT: s_clause 0x3 951; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 952; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 953; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 954; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 955; CHECK-NEXT: s_waitcnt vmcnt(0) 956; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 957; CHECK-NEXT: s_waitcnt lgkmcnt(0) 958; CHECK-NEXT: s_setpc_b64 s[30:31] 959entry: 960 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) 961 ret void 962} 963 964define void @memcpy_p0_p5_sz31_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 965; CHECK-LABEL: memcpy_p0_p5_sz31_align_2_2: 966; CHECK: ; %bb.0: ; %entry 967; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; CHECK-NEXT: s_clause 0x8 969; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 970; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 971; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 972; CHECK-NEXT: buffer_load_ushort v10, v2, s[0:3], 0 offen offset:28 973; CHECK-NEXT: buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:30 974; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 975; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 976; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 977; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 978; CHECK-NEXT: s_waitcnt vmcnt(5) 979; CHECK-NEXT: flat_store_short v[0:1], v10 offset:28 980; CHECK-NEXT: s_waitcnt vmcnt(4) 981; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:30 982; CHECK-NEXT: flat_store_dwordx3 v[0:1], v[7:9] offset:16 983; CHECK-NEXT: s_waitcnt vmcnt(0) 984; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 985; CHECK-NEXT: s_waitcnt lgkmcnt(0) 986; CHECK-NEXT: s_setpc_b64 s[30:31] 987entry: 988 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) 989 ret void 990} 991 992define void @memcpy_p0_p5_sz32_align_2_2(ptr addrspace(0) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 993; CHECK-LABEL: memcpy_p0_p5_sz32_align_2_2: 994; CHECK: ; %bb.0: ; %entry 995; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; CHECK-NEXT: s_clause 0x7 997; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen offset:16 998; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:20 999; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:24 1000; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:28 1001; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen 1002; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:4 1003; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:8 1004; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:12 1005; CHECK-NEXT: s_waitcnt vmcnt(4) 1006; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] offset:16 1007; CHECK-NEXT: s_waitcnt vmcnt(0) 1008; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 1009; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1010; CHECK-NEXT: s_setpc_b64 s[30:31] 1011entry: 1012 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) 1013 ret void 1014} 1015 1016define void @memcpy_p0_p5_sz16_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 1017; CHECK-LABEL: memcpy_p0_p5_sz16_align_8_8: 1018; CHECK: ; %bb.0: ; %entry 1019; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1020; CHECK-NEXT: s_clause 0x3 1021; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1022; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1023; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1024; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1025; CHECK-NEXT: s_waitcnt vmcnt(0) 1026; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1027; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1028; CHECK-NEXT: s_setpc_b64 s[30:31] 1029entry: 1030 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) 1031 ret void 1032} 1033 1034define void @memcpy_p0_p5_sz31_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 1035; CHECK-LABEL: memcpy_p0_p5_sz31_align_8_8: 1036; CHECK: ; %bb.0: ; %entry 1037; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1038; CHECK-NEXT: s_clause 0x7 1039; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1040; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1041; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1042; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1043; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 1044; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 1045; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 1046; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 1047; CHECK-NEXT: s_waitcnt vmcnt(4) 1048; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1049; CHECK-NEXT: s_waitcnt vmcnt(0) 1050; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15 1051; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1052; CHECK-NEXT: s_setpc_b64 s[30:31] 1053entry: 1054 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) 1055 ret void 1056} 1057 1058define void @memcpy_p0_p5_sz32_align_8_8(ptr addrspace(0) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 1059; CHECK-LABEL: memcpy_p0_p5_sz32_align_8_8: 1060; CHECK: ; %bb.0: ; %entry 1061; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; CHECK-NEXT: s_clause 0x7 1063; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1064; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1065; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1066; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1067; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 1068; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 1069; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 1070; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 1071; CHECK-NEXT: s_waitcnt vmcnt(4) 1072; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1073; CHECK-NEXT: s_waitcnt vmcnt(0) 1074; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16 1075; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1076; CHECK-NEXT: s_setpc_b64 s[30:31] 1077entry: 1078 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) 1079 ret void 1080} 1081 1082define void @memcpy_p0_p5_sz16_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 1083; CHECK-LABEL: memcpy_p0_p5_sz16_align_16_16: 1084; CHECK: ; %bb.0: ; %entry 1085; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1086; CHECK-NEXT: s_clause 0x3 1087; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1088; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1089; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1090; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1091; CHECK-NEXT: s_waitcnt vmcnt(0) 1092; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1093; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1094; CHECK-NEXT: s_setpc_b64 s[30:31] 1095entry: 1096 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) 1097 ret void 1098} 1099 1100define void @memcpy_p0_p5_sz31_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 1101; CHECK-LABEL: memcpy_p0_p5_sz31_align_16_16: 1102; CHECK: ; %bb.0: ; %entry 1103; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; CHECK-NEXT: s_clause 0x7 1105; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1106; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1107; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1108; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1109; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 1110; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 1111; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 1112; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 1113; CHECK-NEXT: s_waitcnt vmcnt(4) 1114; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1115; CHECK-NEXT: s_waitcnt vmcnt(0) 1116; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:15 1117; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1118; CHECK-NEXT: s_setpc_b64 s[30:31] 1119entry: 1120 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) 1121 ret void 1122} 1123 1124define void @memcpy_p0_p5_sz32_align_16_16(ptr addrspace(0) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 1125; CHECK-LABEL: memcpy_p0_p5_sz32_align_16_16: 1126; CHECK: ; %bb.0: ; %entry 1127; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; CHECK-NEXT: s_clause 0x7 1129; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1130; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1131; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1132; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1133; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 1134; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 1135; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 1136; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 1137; CHECK-NEXT: s_waitcnt vmcnt(4) 1138; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 1139; CHECK-NEXT: s_waitcnt vmcnt(0) 1140; CHECK-NEXT: flat_store_dwordx4 v[0:1], v[7:10] offset:16 1141; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1142; CHECK-NEXT: s_setpc_b64 s[30:31] 1143entry: 1144 tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) 1145 ret void 1146} 1147 1148define void @memcpy_p1_p0_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 1149; CHECK-LABEL: memcpy_p1_p0_sz16_align_1_1: 1150; CHECK: ; %bb.0: ; %entry 1151; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1152; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1153; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1154; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1155; CHECK-NEXT: s_setpc_b64 s[30:31] 1156entry: 1157 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) 1158 ret void 1159} 1160 1161define void @memcpy_p1_p0_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 1162; CHECK-LABEL: memcpy_p1_p0_sz31_align_1_1: 1163; CHECK: ; %bb.0: ; %entry 1164; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1165; CHECK-NEXT: s_clause 0x2 1166; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 1167; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 1168; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1169; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 1170; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 1171; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1172; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 1173; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1174; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1175; CHECK-NEXT: s_setpc_b64 s[30:31] 1176entry: 1177 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) 1178 ret void 1179} 1180 1181define void @memcpy_p1_p0_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 1182; CHECK-LABEL: memcpy_p1_p0_sz32_align_1_1: 1183; CHECK: ; %bb.0: ; %entry 1184; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; CHECK-NEXT: s_clause 0x1 1186; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 1187; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1188; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1189; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1190; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1191; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1192; CHECK-NEXT: s_setpc_b64 s[30:31] 1193entry: 1194 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) 1195 ret void 1196} 1197 1198define void @memcpy_p1_p0_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 1199; CHECK-LABEL: memcpy_p1_p0_sz16_align_2_2: 1200; CHECK: ; %bb.0: ; %entry 1201; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1202; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1203; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1204; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1205; CHECK-NEXT: s_setpc_b64 s[30:31] 1206entry: 1207 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) 1208 ret void 1209} 1210 1211define void @memcpy_p1_p0_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 1212; CHECK-LABEL: memcpy_p1_p0_sz31_align_2_2: 1213; CHECK: ; %bb.0: ; %entry 1214; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; CHECK-NEXT: s_clause 0x2 1216; CHECK-NEXT: flat_load_dwordx2 v[6:7], v[2:3] offset:23 1217; CHECK-NEXT: flat_load_dwordx2 v[8:9], v[2:3] offset:16 1218; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1219; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 1220; CHECK-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:23 1221; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1222; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:16 1223; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1224; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1225; CHECK-NEXT: s_setpc_b64 s[30:31] 1226entry: 1227 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) 1228 ret void 1229} 1230 1231define void @memcpy_p1_p0_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 1232; CHECK-LABEL: memcpy_p1_p0_sz32_align_2_2: 1233; CHECK: ; %bb.0: ; %entry 1234; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1235; CHECK-NEXT: s_clause 0x1 1236; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 1237; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1238; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1239; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1240; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1241; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1242; CHECK-NEXT: s_setpc_b64 s[30:31] 1243entry: 1244 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) 1245 ret void 1246} 1247 1248define void @memcpy_p1_p0_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 1249; CHECK-LABEL: memcpy_p1_p0_sz16_align_8_8: 1250; CHECK: ; %bb.0: ; %entry 1251; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1252; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1253; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1254; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1255; CHECK-NEXT: s_setpc_b64 s[30:31] 1256entry: 1257 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) 1258 ret void 1259} 1260 1261define void @memcpy_p1_p0_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 1262; CHECK-LABEL: memcpy_p1_p0_sz31_align_8_8: 1263; CHECK: ; %bb.0: ; %entry 1264; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1265; CHECK-NEXT: s_clause 0x1 1266; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 1267; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1268; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1269; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 1270; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1271; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1272; CHECK-NEXT: s_setpc_b64 s[30:31] 1273entry: 1274 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) 1275 ret void 1276} 1277 1278define void @memcpy_p1_p0_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 1279; CHECK-LABEL: memcpy_p1_p0_sz32_align_8_8: 1280; CHECK: ; %bb.0: ; %entry 1281; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1282; CHECK-NEXT: s_clause 0x1 1283; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 1284; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1285; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1286; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1287; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1288; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1289; CHECK-NEXT: s_setpc_b64 s[30:31] 1290entry: 1291 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) 1292 ret void 1293} 1294 1295define void @memcpy_p1_p0_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 1296; CHECK-LABEL: memcpy_p1_p0_sz16_align_16_16: 1297; CHECK: ; %bb.0: ; %entry 1298; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1299; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[2:3] 1300; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1301; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1302; CHECK-NEXT: s_setpc_b64 s[30:31] 1303entry: 1304 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) 1305 ret void 1306} 1307 1308define void @memcpy_p1_p0_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 1309; CHECK-LABEL: memcpy_p1_p0_sz31_align_16_16: 1310; CHECK: ; %bb.0: ; %entry 1311; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1312; CHECK-NEXT: s_clause 0x1 1313; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:15 1314; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1315; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1316; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 1317; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1318; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1319; CHECK-NEXT: s_setpc_b64 s[30:31] 1320entry: 1321 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) 1322 ret void 1323} 1324 1325define void @memcpy_p1_p0_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 1326; CHECK-LABEL: memcpy_p1_p0_sz32_align_16_16: 1327; CHECK: ; %bb.0: ; %entry 1328; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1329; CHECK-NEXT: s_clause 0x1 1330; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[2:3] offset:16 1331; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[2:3] 1332; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 1333; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1334; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1335; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1336; CHECK-NEXT: s_setpc_b64 s[30:31] 1337entry: 1338 tail call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) 1339 ret void 1340} 1341 1342define void @memcpy_p1_p1_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 1343; CHECK-LABEL: memcpy_p1_p1_sz16_align_1_1: 1344; CHECK: ; %bb.0: ; %entry 1345; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1346; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1347; CHECK-NEXT: s_waitcnt vmcnt(0) 1348; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1349; CHECK-NEXT: s_setpc_b64 s[30:31] 1350entry: 1351 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) 1352 ret void 1353} 1354 1355define void @memcpy_p1_p1_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 1356; CHECK-LABEL: memcpy_p1_p1_sz31_align_1_1: 1357; CHECK: ; %bb.0: ; %entry 1358; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1359; CHECK-NEXT: s_clause 0x2 1360; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 1361; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1362; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 1363; CHECK-NEXT: s_waitcnt vmcnt(2) 1364; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 1365; CHECK-NEXT: s_waitcnt vmcnt(1) 1366; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1367; CHECK-NEXT: s_waitcnt vmcnt(0) 1368; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 1369; CHECK-NEXT: s_setpc_b64 s[30:31] 1370entry: 1371 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) 1372 ret void 1373} 1374 1375define void @memcpy_p1_p1_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 1376; CHECK-LABEL: memcpy_p1_p1_sz32_align_1_1: 1377; CHECK: ; %bb.0: ; %entry 1378; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1379; CHECK-NEXT: s_clause 0x1 1380; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 1381; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1382; CHECK-NEXT: s_waitcnt vmcnt(1) 1383; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1384; CHECK-NEXT: s_waitcnt vmcnt(0) 1385; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1386; CHECK-NEXT: s_setpc_b64 s[30:31] 1387entry: 1388 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) 1389 ret void 1390} 1391 1392define void @memcpy_p1_p1_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 1393; CHECK-LABEL: memcpy_p1_p1_sz16_align_2_2: 1394; CHECK: ; %bb.0: ; %entry 1395; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1396; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1397; CHECK-NEXT: s_waitcnt vmcnt(0) 1398; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1399; CHECK-NEXT: s_setpc_b64 s[30:31] 1400entry: 1401 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) 1402 ret void 1403} 1404 1405define void @memcpy_p1_p1_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 1406; CHECK-LABEL: memcpy_p1_p1_sz31_align_2_2: 1407; CHECK: ; %bb.0: ; %entry 1408; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1409; CHECK-NEXT: s_clause 0x2 1410; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off offset:23 1411; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1412; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:16 1413; CHECK-NEXT: s_waitcnt vmcnt(2) 1414; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off offset:23 1415; CHECK-NEXT: s_waitcnt vmcnt(1) 1416; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1417; CHECK-NEXT: s_waitcnt vmcnt(0) 1418; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:16 1419; CHECK-NEXT: s_setpc_b64 s[30:31] 1420entry: 1421 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) 1422 ret void 1423} 1424 1425define void @memcpy_p1_p1_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 1426; CHECK-LABEL: memcpy_p1_p1_sz32_align_2_2: 1427; CHECK: ; %bb.0: ; %entry 1428; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; CHECK-NEXT: s_clause 0x1 1430; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 1431; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1432; CHECK-NEXT: s_waitcnt vmcnt(1) 1433; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1434; CHECK-NEXT: s_waitcnt vmcnt(0) 1435; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1436; CHECK-NEXT: s_setpc_b64 s[30:31] 1437entry: 1438 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) 1439 ret void 1440} 1441 1442define void @memcpy_p1_p1_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 1443; CHECK-LABEL: memcpy_p1_p1_sz16_align_8_8: 1444; CHECK: ; %bb.0: ; %entry 1445; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1446; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1447; CHECK-NEXT: s_waitcnt vmcnt(0) 1448; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1449; CHECK-NEXT: s_setpc_b64 s[30:31] 1450entry: 1451 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) 1452 ret void 1453} 1454 1455define void @memcpy_p1_p1_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 1456; CHECK-LABEL: memcpy_p1_p1_sz31_align_8_8: 1457; CHECK: ; %bb.0: ; %entry 1458; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1459; CHECK-NEXT: s_clause 0x1 1460; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 1461; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1462; CHECK-NEXT: s_waitcnt vmcnt(1) 1463; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 1464; CHECK-NEXT: s_waitcnt vmcnt(0) 1465; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1466; CHECK-NEXT: s_setpc_b64 s[30:31] 1467entry: 1468 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) 1469 ret void 1470} 1471 1472define void @memcpy_p1_p1_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 1473; CHECK-LABEL: memcpy_p1_p1_sz32_align_8_8: 1474; CHECK: ; %bb.0: ; %entry 1475; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1476; CHECK-NEXT: s_clause 0x1 1477; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 1478; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1479; CHECK-NEXT: s_waitcnt vmcnt(1) 1480; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1481; CHECK-NEXT: s_waitcnt vmcnt(0) 1482; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1483; CHECK-NEXT: s_setpc_b64 s[30:31] 1484entry: 1485 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) 1486 ret void 1487} 1488 1489define void @memcpy_p1_p1_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 1490; CHECK-LABEL: memcpy_p1_p1_sz16_align_16_16: 1491; CHECK: ; %bb.0: ; %entry 1492; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1493; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1494; CHECK-NEXT: s_waitcnt vmcnt(0) 1495; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1496; CHECK-NEXT: s_setpc_b64 s[30:31] 1497entry: 1498 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) 1499 ret void 1500} 1501 1502define void @memcpy_p1_p1_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 1503; CHECK-LABEL: memcpy_p1_p1_sz31_align_16_16: 1504; CHECK: ; %bb.0: ; %entry 1505; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1506; CHECK-NEXT: s_clause 0x1 1507; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:15 1508; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1509; CHECK-NEXT: s_waitcnt vmcnt(1) 1510; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:15 1511; CHECK-NEXT: s_waitcnt vmcnt(0) 1512; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1513; CHECK-NEXT: s_setpc_b64 s[30:31] 1514entry: 1515 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) 1516 ret void 1517} 1518 1519define void @memcpy_p1_p1_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 1520; CHECK-LABEL: memcpy_p1_p1_sz32_align_16_16: 1521; CHECK: ; %bb.0: ; %entry 1522; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1523; CHECK-NEXT: s_clause 0x1 1524; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:16 1525; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off 1526; CHECK-NEXT: s_waitcnt vmcnt(1) 1527; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:16 1528; CHECK-NEXT: s_waitcnt vmcnt(0) 1529; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off 1530; CHECK-NEXT: s_setpc_b64 s[30:31] 1531entry: 1532 tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) 1533 ret void 1534} 1535 1536define void @memcpy_p1_p3_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 1537; CHECK-LABEL: memcpy_p1_p3_sz16_align_1_1: 1538; CHECK: ; %bb.0: ; %entry 1539; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1540; CHECK-NEXT: ds_read_b128 v[2:5], v2 1541; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1542; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1543; CHECK-NEXT: s_setpc_b64 s[30:31] 1544entry: 1545 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) 1546 ret void 1547} 1548 1549define void @memcpy_p1_p3_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 1550; CHECK-LABEL: memcpy_p1_p3_sz31_align_1_1: 1551; CHECK: ; %bb.0: ; %entry 1552; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1553; CHECK-NEXT: ds_read_b64 v[7:8], v2 1554; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8 1555; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23 1556; CHECK-NEXT: s_waitcnt lgkmcnt(2) 1557; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off 1558; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1559; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 1560; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1561; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 1562; CHECK-NEXT: s_setpc_b64 s[30:31] 1563entry: 1564 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) 1565 ret void 1566} 1567 1568define void @memcpy_p1_p3_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 1569; CHECK-LABEL: memcpy_p1_p3_sz32_align_1_1: 1570; CHECK: ; %bb.0: ; %entry 1571; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1572; CHECK-NEXT: ds_read_b128 v[3:6], v2 1573; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 1574; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1575; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1576; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1577; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 1578; CHECK-NEXT: s_setpc_b64 s[30:31] 1579entry: 1580 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) 1581 ret void 1582} 1583 1584define void @memcpy_p1_p3_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 1585; CHECK-LABEL: memcpy_p1_p3_sz16_align_2_2: 1586; CHECK: ; %bb.0: ; %entry 1587; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1588; CHECK-NEXT: ds_read_b128 v[2:5], v2 1589; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1590; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1591; CHECK-NEXT: s_setpc_b64 s[30:31] 1592entry: 1593 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) 1594 ret void 1595} 1596 1597define void @memcpy_p1_p3_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 1598; CHECK-LABEL: memcpy_p1_p3_sz31_align_2_2: 1599; CHECK: ; %bb.0: ; %entry 1600; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; CHECK-NEXT: ds_read_b64 v[7:8], v2 1602; CHECK-NEXT: ds_read_b128 v[3:6], v2 offset:8 1603; CHECK-NEXT: ds_read_b64 v[9:10], v2 offset:23 1604; CHECK-NEXT: s_waitcnt lgkmcnt(2) 1605; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off 1606; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1607; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off offset:8 1608; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1609; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 1610; CHECK-NEXT: s_setpc_b64 s[30:31] 1611entry: 1612 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) 1613 ret void 1614} 1615 1616define void @memcpy_p1_p3_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 1617; CHECK-LABEL: memcpy_p1_p3_sz32_align_2_2: 1618; CHECK: ; %bb.0: ; %entry 1619; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1620; CHECK-NEXT: ds_read_b128 v[3:6], v2 1621; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 1622; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1623; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1624; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1625; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 1626; CHECK-NEXT: s_setpc_b64 s[30:31] 1627entry: 1628 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) 1629 ret void 1630} 1631 1632define void @memcpy_p1_p3_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 1633; CHECK-LABEL: memcpy_p1_p3_sz16_align_8_8: 1634; CHECK: ; %bb.0: ; %entry 1635; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1636; CHECK-NEXT: ds_read2_b64 v[2:5], v2 offset1:1 1637; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1638; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1639; CHECK-NEXT: s_setpc_b64 s[30:31] 1640entry: 1641 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) 1642 ret void 1643} 1644 1645define void @memcpy_p1_p3_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 1646; CHECK-LABEL: memcpy_p1_p3_sz31_align_8_8: 1647; CHECK: ; %bb.0: ; %entry 1648; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1649; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1 1650; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15 1651; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1652; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1653; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1654; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 1655; CHECK-NEXT: s_setpc_b64 s[30:31] 1656entry: 1657 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) 1658 ret void 1659} 1660 1661define void @memcpy_p1_p3_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 1662; CHECK-LABEL: memcpy_p1_p3_sz32_align_8_8: 1663; CHECK: ; %bb.0: ; %entry 1664; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1665; CHECK-NEXT: ds_read2_b64 v[3:6], v2 offset1:1 1666; CHECK-NEXT: ds_read2_b64 v[7:10], v2 offset0:2 offset1:3 1667; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1668; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1669; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1670; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 1671; CHECK-NEXT: s_setpc_b64 s[30:31] 1672entry: 1673 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) 1674 ret void 1675} 1676 1677define void @memcpy_p1_p3_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 1678; CHECK-LABEL: memcpy_p1_p3_sz16_align_16_16: 1679; CHECK: ; %bb.0: ; %entry 1680; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1681; CHECK-NEXT: ds_read_b128 v[2:5], v2 1682; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1683; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1684; CHECK-NEXT: s_setpc_b64 s[30:31] 1685entry: 1686 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) 1687 ret void 1688} 1689 1690define void @memcpy_p1_p3_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 1691; CHECK-LABEL: memcpy_p1_p3_sz31_align_16_16: 1692; CHECK: ; %bb.0: ; %entry 1693; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1694; CHECK-NEXT: ds_read_b128 v[3:6], v2 1695; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:15 1696; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1697; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1698; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1699; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 1700; CHECK-NEXT: s_setpc_b64 s[30:31] 1701entry: 1702 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) 1703 ret void 1704} 1705 1706define void @memcpy_p1_p3_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 1707; CHECK-LABEL: memcpy_p1_p3_sz32_align_16_16: 1708; CHECK: ; %bb.0: ; %entry 1709; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1710; CHECK-NEXT: ds_read_b128 v[3:6], v2 1711; CHECK-NEXT: ds_read_b128 v[7:10], v2 offset:16 1712; CHECK-NEXT: s_waitcnt lgkmcnt(1) 1713; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1714; CHECK-NEXT: s_waitcnt lgkmcnt(0) 1715; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 1716; CHECK-NEXT: s_setpc_b64 s[30:31] 1717entry: 1718 tail call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) 1719 ret void 1720} 1721 1722define void @memcpy_p1_p4_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 1723; CHECK-LABEL: memcpy_p1_p4_sz16_align_1_1: 1724; CHECK: ; %bb.0: ; %entry 1725; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1726; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1727; CHECK-NEXT: s_waitcnt vmcnt(0) 1728; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1729; CHECK-NEXT: s_setpc_b64 s[30:31] 1730entry: 1731 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) 1732 ret void 1733} 1734 1735define void @memcpy_p1_p4_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 1736; CHECK-LABEL: memcpy_p1_p4_sz31_align_1_1: 1737; CHECK: ; %bb.0: ; %entry 1738; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1739; CHECK-NEXT: s_clause 0x1 1740; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off 1741; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 1742; CHECK-NEXT: s_waitcnt vmcnt(1) 1743; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off 1744; CHECK-NEXT: s_waitcnt vmcnt(0) 1745; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8 1746; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 1747; CHECK-NEXT: s_waitcnt vmcnt(0) 1748; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23 1749; CHECK-NEXT: s_setpc_b64 s[30:31] 1750entry: 1751 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) 1752 ret void 1753} 1754 1755define void @memcpy_p1_p4_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 1756; CHECK-LABEL: memcpy_p1_p4_sz32_align_1_1: 1757; CHECK: ; %bb.0: ; %entry 1758; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; CHECK-NEXT: s_clause 0x1 1760; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1761; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 1762; CHECK-NEXT: s_waitcnt vmcnt(1) 1763; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1764; CHECK-NEXT: s_waitcnt vmcnt(0) 1765; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16 1766; CHECK-NEXT: s_setpc_b64 s[30:31] 1767entry: 1768 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) 1769 ret void 1770} 1771 1772define void @memcpy_p1_p4_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 1773; CHECK-LABEL: memcpy_p1_p4_sz16_align_2_2: 1774; CHECK: ; %bb.0: ; %entry 1775; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1776; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1777; CHECK-NEXT: s_waitcnt vmcnt(0) 1778; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1779; CHECK-NEXT: s_setpc_b64 s[30:31] 1780entry: 1781 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) 1782 ret void 1783} 1784 1785define void @memcpy_p1_p4_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 1786; CHECK-LABEL: memcpy_p1_p4_sz31_align_2_2: 1787; CHECK: ; %bb.0: ; %entry 1788; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1789; CHECK-NEXT: s_clause 0x1 1790; CHECK-NEXT: global_load_dwordx2 v[8:9], v[2:3], off 1791; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off offset:8 1792; CHECK-NEXT: s_waitcnt vmcnt(1) 1793; CHECK-NEXT: global_store_dwordx2 v[0:1], v[8:9], off 1794; CHECK-NEXT: s_waitcnt vmcnt(0) 1795; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off offset:8 1796; CHECK-NEXT: global_load_dwordx2 v[2:3], v[2:3], off offset:23 1797; CHECK-NEXT: s_waitcnt vmcnt(0) 1798; CHECK-NEXT: global_store_dwordx2 v[0:1], v[2:3], off offset:23 1799; CHECK-NEXT: s_setpc_b64 s[30:31] 1800entry: 1801 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) 1802 ret void 1803} 1804 1805define void @memcpy_p1_p4_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 1806; CHECK-LABEL: memcpy_p1_p4_sz32_align_2_2: 1807; CHECK: ; %bb.0: ; %entry 1808; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1809; CHECK-NEXT: s_clause 0x1 1810; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1811; CHECK-NEXT: global_load_dwordx4 v[8:11], v[2:3], off offset:16 1812; CHECK-NEXT: s_waitcnt vmcnt(1) 1813; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1814; CHECK-NEXT: s_waitcnt vmcnt(0) 1815; CHECK-NEXT: global_store_dwordx4 v[0:1], v[8:11], off offset:16 1816; CHECK-NEXT: s_setpc_b64 s[30:31] 1817entry: 1818 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) 1819 ret void 1820} 1821 1822define void @memcpy_p1_p4_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 1823; CHECK-LABEL: memcpy_p1_p4_sz16_align_8_8: 1824; CHECK: ; %bb.0: ; %entry 1825; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1827; CHECK-NEXT: s_waitcnt vmcnt(0) 1828; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1829; CHECK-NEXT: s_setpc_b64 s[30:31] 1830entry: 1831 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) 1832 ret void 1833} 1834 1835define void @memcpy_p1_p4_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 1836; CHECK-LABEL: memcpy_p1_p4_sz31_align_8_8: 1837; CHECK: ; %bb.0: ; %entry 1838; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1839; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1840; CHECK-NEXT: s_waitcnt vmcnt(0) 1841; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1842; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 1843; CHECK-NEXT: s_waitcnt vmcnt(0) 1844; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15 1845; CHECK-NEXT: s_setpc_b64 s[30:31] 1846entry: 1847 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) 1848 ret void 1849} 1850 1851define void @memcpy_p1_p4_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 1852; CHECK-LABEL: memcpy_p1_p4_sz32_align_8_8: 1853; CHECK: ; %bb.0: ; %entry 1854; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1855; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1856; CHECK-NEXT: s_waitcnt vmcnt(0) 1857; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1858; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 1859; CHECK-NEXT: s_waitcnt vmcnt(0) 1860; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 1861; CHECK-NEXT: s_setpc_b64 s[30:31] 1862entry: 1863 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) 1864 ret void 1865} 1866 1867define void @memcpy_p1_p4_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 1868; CHECK-LABEL: memcpy_p1_p4_sz16_align_16_16: 1869; CHECK: ; %bb.0: ; %entry 1870; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1871; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off 1872; CHECK-NEXT: s_waitcnt vmcnt(0) 1873; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1874; CHECK-NEXT: s_setpc_b64 s[30:31] 1875entry: 1876 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) 1877 ret void 1878} 1879 1880define void @memcpy_p1_p4_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 1881; CHECK-LABEL: memcpy_p1_p4_sz31_align_16_16: 1882; CHECK: ; %bb.0: ; %entry 1883; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1884; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1885; CHECK-NEXT: s_waitcnt vmcnt(0) 1886; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1887; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:15 1888; CHECK-NEXT: s_waitcnt vmcnt(0) 1889; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:15 1890; CHECK-NEXT: s_setpc_b64 s[30:31] 1891entry: 1892 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) 1893 ret void 1894} 1895 1896define void @memcpy_p1_p4_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 1897; CHECK-LABEL: memcpy_p1_p4_sz32_align_16_16: 1898; CHECK: ; %bb.0: ; %entry 1899; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1900; CHECK-NEXT: global_load_dwordx4 v[4:7], v[2:3], off 1901; CHECK-NEXT: s_waitcnt vmcnt(0) 1902; CHECK-NEXT: global_store_dwordx4 v[0:1], v[4:7], off 1903; CHECK-NEXT: global_load_dwordx4 v[2:5], v[2:3], off offset:16 1904; CHECK-NEXT: s_waitcnt vmcnt(0) 1905; CHECK-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 1906; CHECK-NEXT: s_setpc_b64 s[30:31] 1907entry: 1908 tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) 1909 ret void 1910} 1911 1912define void @memcpy_p1_p5_sz16_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 1913; CHECK-LABEL: memcpy_p1_p5_sz16_align_1_1: 1914; CHECK: ; %bb.0: ; %entry 1915; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1916; CHECK-NEXT: s_clause 0x3 1917; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1918; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1919; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1920; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1921; CHECK-NEXT: s_waitcnt vmcnt(0) 1922; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1923; CHECK-NEXT: s_setpc_b64 s[30:31] 1924entry: 1925 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) 1926 ret void 1927} 1928 1929define void @memcpy_p1_p5_sz31_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 1930; CHECK-LABEL: memcpy_p1_p5_sz31_align_1_1: 1931; CHECK: ; %bb.0: ; %entry 1932; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1933; CHECK-NEXT: s_clause 0x7 1934; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1935; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1936; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1937; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1938; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 1939; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 1940; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 1941; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 1942; CHECK-NEXT: s_waitcnt vmcnt(4) 1943; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1944; CHECK-NEXT: s_waitcnt vmcnt(2) 1945; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 1946; CHECK-NEXT: s_waitcnt vmcnt(0) 1947; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 1948; CHECK-NEXT: s_setpc_b64 s[30:31] 1949entry: 1950 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) 1951 ret void 1952} 1953 1954define void @memcpy_p1_p5_sz32_align_1_1(ptr addrspace(1) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 1955; CHECK-LABEL: memcpy_p1_p5_sz32_align_1_1: 1956; CHECK: ; %bb.0: ; %entry 1957; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1958; CHECK-NEXT: s_clause 0x7 1959; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1960; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1961; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1962; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1963; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 1964; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 1965; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 1966; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 1967; CHECK-NEXT: s_waitcnt vmcnt(4) 1968; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1969; CHECK-NEXT: s_waitcnt vmcnt(0) 1970; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 1971; CHECK-NEXT: s_setpc_b64 s[30:31] 1972entry: 1973 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) 1974 ret void 1975} 1976 1977define void @memcpy_p1_p5_sz16_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 1978; CHECK-LABEL: memcpy_p1_p5_sz16_align_2_2: 1979; CHECK: ; %bb.0: ; %entry 1980; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1981; CHECK-NEXT: s_clause 0x3 1982; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 1983; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 1984; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 1985; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 1986; CHECK-NEXT: s_waitcnt vmcnt(0) 1987; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 1988; CHECK-NEXT: s_setpc_b64 s[30:31] 1989entry: 1990 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) 1991 ret void 1992} 1993 1994define void @memcpy_p1_p5_sz31_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 1995; CHECK-LABEL: memcpy_p1_p5_sz31_align_2_2: 1996; CHECK: ; %bb.0: ; %entry 1997; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1998; CHECK-NEXT: s_clause 0x7 1999; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2000; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2001; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2002; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2003; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 2004; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 2005; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 2006; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 2007; CHECK-NEXT: s_waitcnt vmcnt(4) 2008; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2009; CHECK-NEXT: s_waitcnt vmcnt(2) 2010; CHECK-NEXT: global_store_dwordx2 v[0:1], v[7:8], off offset:16 2011; CHECK-NEXT: s_waitcnt vmcnt(0) 2012; CHECK-NEXT: global_store_dwordx2 v[0:1], v[9:10], off offset:23 2013; CHECK-NEXT: s_setpc_b64 s[30:31] 2014entry: 2015 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) 2016 ret void 2017} 2018 2019define void @memcpy_p1_p5_sz32_align_2_2(ptr addrspace(1) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 2020; CHECK-LABEL: memcpy_p1_p5_sz32_align_2_2: 2021; CHECK: ; %bb.0: ; %entry 2022; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2023; CHECK-NEXT: s_clause 0x7 2024; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2025; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2026; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2027; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2028; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 2029; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 2030; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 2031; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 2032; CHECK-NEXT: s_waitcnt vmcnt(4) 2033; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2034; CHECK-NEXT: s_waitcnt vmcnt(0) 2035; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 2036; CHECK-NEXT: s_setpc_b64 s[30:31] 2037entry: 2038 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) 2039 ret void 2040} 2041 2042define void @memcpy_p1_p5_sz16_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 2043; CHECK-LABEL: memcpy_p1_p5_sz16_align_8_8: 2044; CHECK: ; %bb.0: ; %entry 2045; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2046; CHECK-NEXT: s_clause 0x3 2047; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2048; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2049; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2050; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2051; CHECK-NEXT: s_waitcnt vmcnt(0) 2052; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2053; CHECK-NEXT: s_setpc_b64 s[30:31] 2054entry: 2055 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) 2056 ret void 2057} 2058 2059define void @memcpy_p1_p5_sz31_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 2060; CHECK-LABEL: memcpy_p1_p5_sz31_align_8_8: 2061; CHECK: ; %bb.0: ; %entry 2062; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2063; CHECK-NEXT: s_clause 0x7 2064; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2065; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2066; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2067; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2068; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 2069; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 2070; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 2071; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 2072; CHECK-NEXT: s_waitcnt vmcnt(4) 2073; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2074; CHECK-NEXT: s_waitcnt vmcnt(0) 2075; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 2076; CHECK-NEXT: s_setpc_b64 s[30:31] 2077entry: 2078 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) 2079 ret void 2080} 2081 2082define void @memcpy_p1_p5_sz32_align_8_8(ptr addrspace(1) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 2083; CHECK-LABEL: memcpy_p1_p5_sz32_align_8_8: 2084; CHECK: ; %bb.0: ; %entry 2085; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2086; CHECK-NEXT: s_clause 0x7 2087; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2088; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2089; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2090; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2091; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 2092; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 2093; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 2094; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 2095; CHECK-NEXT: s_waitcnt vmcnt(4) 2096; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2097; CHECK-NEXT: s_waitcnt vmcnt(0) 2098; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 2099; CHECK-NEXT: s_setpc_b64 s[30:31] 2100entry: 2101 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) 2102 ret void 2103} 2104 2105define void @memcpy_p1_p5_sz16_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 2106; CHECK-LABEL: memcpy_p1_p5_sz16_align_16_16: 2107; CHECK: ; %bb.0: ; %entry 2108; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2109; CHECK-NEXT: s_clause 0x3 2110; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2111; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2112; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2113; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2114; CHECK-NEXT: s_waitcnt vmcnt(0) 2115; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2116; CHECK-NEXT: s_setpc_b64 s[30:31] 2117entry: 2118 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) 2119 ret void 2120} 2121 2122define void @memcpy_p1_p5_sz31_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 2123; CHECK-LABEL: memcpy_p1_p5_sz31_align_16_16: 2124; CHECK: ; %bb.0: ; %entry 2125; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2126; CHECK-NEXT: s_clause 0x7 2127; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2128; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2129; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2130; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2131; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:15 2132; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:19 2133; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:23 2134; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:27 2135; CHECK-NEXT: s_waitcnt vmcnt(4) 2136; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2137; CHECK-NEXT: s_waitcnt vmcnt(0) 2138; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:15 2139; CHECK-NEXT: s_setpc_b64 s[30:31] 2140entry: 2141 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) 2142 ret void 2143} 2144 2145define void @memcpy_p1_p5_sz32_align_16_16(ptr addrspace(1) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 2146; CHECK-LABEL: memcpy_p1_p5_sz32_align_16_16: 2147; CHECK: ; %bb.0: ; %entry 2148; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2149; CHECK-NEXT: s_clause 0x7 2150; CHECK-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen 2151; CHECK-NEXT: buffer_load_dword v4, v2, s[0:3], 0 offen offset:4 2152; CHECK-NEXT: buffer_load_dword v5, v2, s[0:3], 0 offen offset:8 2153; CHECK-NEXT: buffer_load_dword v6, v2, s[0:3], 0 offen offset:12 2154; CHECK-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:16 2155; CHECK-NEXT: buffer_load_dword v8, v2, s[0:3], 0 offen offset:20 2156; CHECK-NEXT: buffer_load_dword v9, v2, s[0:3], 0 offen offset:24 2157; CHECK-NEXT: buffer_load_dword v10, v2, s[0:3], 0 offen offset:28 2158; CHECK-NEXT: s_waitcnt vmcnt(4) 2159; CHECK-NEXT: global_store_dwordx4 v[0:1], v[3:6], off 2160; CHECK-NEXT: s_waitcnt vmcnt(0) 2161; CHECK-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:16 2162; CHECK-NEXT: s_setpc_b64 s[30:31] 2163entry: 2164 tail call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) 2165 ret void 2166} 2167 2168define void @memcpy_p3_p0_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 2169; CHECK-LABEL: memcpy_p3_p0_sz16_align_1_1: 2170; CHECK: ; %bb.0: ; %entry 2171; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2172; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2173; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2174; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2175; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2176; CHECK-NEXT: s_setpc_b64 s[30:31] 2177entry: 2178 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) 2179 ret void 2180} 2181 2182define void @memcpy_p3_p0_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 2183; CHECK-LABEL: memcpy_p3_p0_sz31_align_1_1: 2184; CHECK: ; %bb.0: ; %entry 2185; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2186; CHECK-NEXT: s_clause 0x2 2187; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 2188; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 2189; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2190; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 2191; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 2192; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) 2193; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2194; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) 2195; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2196; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2197; CHECK-NEXT: s_setpc_b64 s[30:31] 2198entry: 2199 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) 2200 ret void 2201} 2202 2203define void @memcpy_p3_p0_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 2204; CHECK-LABEL: memcpy_p3_p0_sz32_align_1_1: 2205; CHECK: ; %bb.0: ; %entry 2206; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2207; CHECK-NEXT: s_clause 0x1 2208; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 2209; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 2210; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2211; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 2212; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2213; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 2214; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2215; CHECK-NEXT: s_setpc_b64 s[30:31] 2216entry: 2217 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) 2218 ret void 2219} 2220 2221define void @memcpy_p3_p0_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 2222; CHECK-LABEL: memcpy_p3_p0_sz16_align_2_2: 2223; CHECK: ; %bb.0: ; %entry 2224; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2225; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2226; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2227; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2228; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2229; CHECK-NEXT: s_setpc_b64 s[30:31] 2230entry: 2231 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) 2232 ret void 2233} 2234 2235define void @memcpy_p3_p0_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 2236; CHECK-LABEL: memcpy_p3_p0_sz31_align_2_2: 2237; CHECK: ; %bb.0: ; %entry 2238; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2239; CHECK-NEXT: s_clause 0x2 2240; CHECK-NEXT: flat_load_dwordx2 v[5:6], v[1:2] offset:23 2241; CHECK-NEXT: flat_load_dwordx2 v[7:8], v[1:2] offset:16 2242; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2243; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 2244; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 2245; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(2) 2246; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2247; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(2) 2248; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2249; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2250; CHECK-NEXT: s_setpc_b64 s[30:31] 2251entry: 2252 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) 2253 ret void 2254} 2255 2256define void @memcpy_p3_p0_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 2257; CHECK-LABEL: memcpy_p3_p0_sz32_align_2_2: 2258; CHECK: ; %bb.0: ; %entry 2259; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2260; CHECK-NEXT: s_clause 0x1 2261; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 2262; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 2263; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2264; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 2265; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2266; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 2267; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2268; CHECK-NEXT: s_setpc_b64 s[30:31] 2269entry: 2270 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) 2271 ret void 2272} 2273 2274define void @memcpy_p3_p0_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 2275; CHECK-LABEL: memcpy_p3_p0_sz16_align_8_8: 2276; CHECK: ; %bb.0: ; %entry 2277; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2278; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2279; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2280; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2281; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2282; CHECK-NEXT: s_setpc_b64 s[30:31] 2283entry: 2284 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) 2285 ret void 2286} 2287 2288define void @memcpy_p3_p0_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 2289; CHECK-LABEL: memcpy_p3_p0_sz31_align_8_8: 2290; CHECK: ; %bb.0: ; %entry 2291; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2292; CHECK-NEXT: s_clause 0x1 2293; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] 2294; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] offset:15 2295; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2296; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2297; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2298; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 2299; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2300; CHECK-NEXT: s_setpc_b64 s[30:31] 2301entry: 2302 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) 2303 ret void 2304} 2305 2306define void @memcpy_p3_p0_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 2307; CHECK-LABEL: memcpy_p3_p0_sz32_align_8_8: 2308; CHECK: ; %bb.0: ; %entry 2309; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2310; CHECK-NEXT: s_clause 0x1 2311; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 2312; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 2313; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2314; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset0:2 offset1:3 2315; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2316; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset1:1 2317; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2318; CHECK-NEXT: s_setpc_b64 s[30:31] 2319entry: 2320 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) 2321 ret void 2322} 2323 2324define void @memcpy_p3_p0_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 2325; CHECK-LABEL: memcpy_p3_p0_sz16_align_16_16: 2326; CHECK: ; %bb.0: ; %entry 2327; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2328; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 2329; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2330; CHECK-NEXT: ds_write_b128 v0, v[1:4] 2331; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2332; CHECK-NEXT: s_setpc_b64 s[30:31] 2333entry: 2334 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) 2335 ret void 2336} 2337 2338define void @memcpy_p3_p0_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 2339; CHECK-LABEL: memcpy_p3_p0_sz31_align_16_16: 2340; CHECK: ; %bb.0: ; %entry 2341; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2342; CHECK-NEXT: s_clause 0x1 2343; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 2344; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 2345; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2346; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:15 2347; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2348; CHECK-NEXT: ds_write_b128 v0, v[7:10] 2349; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2350; CHECK-NEXT: s_setpc_b64 s[30:31] 2351entry: 2352 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) 2353 ret void 2354} 2355 2356define void @memcpy_p3_p0_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 2357; CHECK-LABEL: memcpy_p3_p0_sz32_align_16_16: 2358; CHECK: ; %bb.0: ; %entry 2359; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2360; CHECK-NEXT: s_clause 0x1 2361; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 2362; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 2363; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 2364; CHECK-NEXT: ds_write_b128 v0, v[3:6] offset:16 2365; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) 2366; CHECK-NEXT: ds_write_b128 v0, v[7:10] 2367; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2368; CHECK-NEXT: s_setpc_b64 s[30:31] 2369entry: 2370 tail call void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) 2371 ret void 2372} 2373 2374define void @memcpy_p3_p1_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 2375; CHECK-LABEL: memcpy_p3_p1_sz16_align_1_1: 2376; CHECK: ; %bb.0: ; %entry 2377; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2378; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2379; CHECK-NEXT: s_waitcnt vmcnt(0) 2380; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2381; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2382; CHECK-NEXT: s_setpc_b64 s[30:31] 2383entry: 2384 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) 2385 ret void 2386} 2387 2388define void @memcpy_p3_p1_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 2389; CHECK-LABEL: memcpy_p3_p1_sz31_align_1_1: 2390; CHECK: ; %bb.0: ; %entry 2391; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2392; CHECK-NEXT: s_clause 0x2 2393; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2394; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 2395; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 2396; CHECK-NEXT: s_waitcnt vmcnt(2) 2397; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2398; CHECK-NEXT: s_waitcnt vmcnt(1) 2399; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2400; CHECK-NEXT: s_waitcnt vmcnt(0) 2401; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 2402; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2403; CHECK-NEXT: s_setpc_b64 s[30:31] 2404entry: 2405 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) 2406 ret void 2407} 2408 2409define void @memcpy_p3_p1_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 2410; CHECK-LABEL: memcpy_p3_p1_sz32_align_1_1: 2411; CHECK: ; %bb.0: ; %entry 2412; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2413; CHECK-NEXT: s_clause 0x1 2414; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2415; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2416; CHECK-NEXT: s_waitcnt vmcnt(1) 2417; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2418; CHECK-NEXT: s_waitcnt vmcnt(0) 2419; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2420; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2421; CHECK-NEXT: s_setpc_b64 s[30:31] 2422entry: 2423 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) 2424 ret void 2425} 2426 2427define void @memcpy_p3_p1_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 2428; CHECK-LABEL: memcpy_p3_p1_sz16_align_2_2: 2429; CHECK: ; %bb.0: ; %entry 2430; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2431; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2432; CHECK-NEXT: s_waitcnt vmcnt(0) 2433; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2434; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2435; CHECK-NEXT: s_setpc_b64 s[30:31] 2436entry: 2437 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) 2438 ret void 2439} 2440 2441define void @memcpy_p3_p1_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 2442; CHECK-LABEL: memcpy_p3_p1_sz31_align_2_2: 2443; CHECK: ; %bb.0: ; %entry 2444; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2445; CHECK-NEXT: s_clause 0x2 2446; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2447; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 2448; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 2449; CHECK-NEXT: s_waitcnt vmcnt(2) 2450; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2451; CHECK-NEXT: s_waitcnt vmcnt(1) 2452; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2453; CHECK-NEXT: s_waitcnt vmcnt(0) 2454; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 2455; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2456; CHECK-NEXT: s_setpc_b64 s[30:31] 2457entry: 2458 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) 2459 ret void 2460} 2461 2462define void @memcpy_p3_p1_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 2463; CHECK-LABEL: memcpy_p3_p1_sz32_align_2_2: 2464; CHECK: ; %bb.0: ; %entry 2465; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2466; CHECK-NEXT: s_clause 0x1 2467; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2468; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2469; CHECK-NEXT: s_waitcnt vmcnt(1) 2470; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2471; CHECK-NEXT: s_waitcnt vmcnt(0) 2472; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2473; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2474; CHECK-NEXT: s_setpc_b64 s[30:31] 2475entry: 2476 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) 2477 ret void 2478} 2479 2480define void @memcpy_p3_p1_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 2481; CHECK-LABEL: memcpy_p3_p1_sz16_align_8_8: 2482; CHECK: ; %bb.0: ; %entry 2483; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2484; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2485; CHECK-NEXT: s_waitcnt vmcnt(0) 2486; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2487; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2488; CHECK-NEXT: s_setpc_b64 s[30:31] 2489entry: 2490 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) 2491 ret void 2492} 2493 2494define void @memcpy_p3_p1_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 2495; CHECK-LABEL: memcpy_p3_p1_sz31_align_8_8: 2496; CHECK: ; %bb.0: ; %entry 2497; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2498; CHECK-NEXT: s_clause 0x1 2499; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2500; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 2501; CHECK-NEXT: s_waitcnt vmcnt(1) 2502; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2503; CHECK-NEXT: s_waitcnt vmcnt(0) 2504; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 2505; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2506; CHECK-NEXT: s_setpc_b64 s[30:31] 2507entry: 2508 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) 2509 ret void 2510} 2511 2512define void @memcpy_p3_p1_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 2513; CHECK-LABEL: memcpy_p3_p1_sz32_align_8_8: 2514; CHECK: ; %bb.0: ; %entry 2515; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2516; CHECK-NEXT: s_clause 0x1 2517; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2518; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2519; CHECK-NEXT: s_waitcnt vmcnt(1) 2520; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2521; CHECK-NEXT: s_waitcnt vmcnt(0) 2522; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2523; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2524; CHECK-NEXT: s_setpc_b64 s[30:31] 2525entry: 2526 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) 2527 ret void 2528} 2529 2530define void @memcpy_p3_p1_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 2531; CHECK-LABEL: memcpy_p3_p1_sz16_align_16_16: 2532; CHECK: ; %bb.0: ; %entry 2533; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2534; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2535; CHECK-NEXT: s_waitcnt vmcnt(0) 2536; CHECK-NEXT: ds_write_b128 v0, v[1:4] 2537; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2538; CHECK-NEXT: s_setpc_b64 s[30:31] 2539entry: 2540 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) 2541 ret void 2542} 2543 2544define void @memcpy_p3_p1_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 2545; CHECK-LABEL: memcpy_p3_p1_sz31_align_16_16: 2546; CHECK: ; %bb.0: ; %entry 2547; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; CHECK-NEXT: s_clause 0x1 2549; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2550; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 2551; CHECK-NEXT: s_waitcnt vmcnt(1) 2552; CHECK-NEXT: ds_write_b128 v0, v[3:6] 2553; CHECK-NEXT: s_waitcnt vmcnt(0) 2554; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 2555; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2556; CHECK-NEXT: s_setpc_b64 s[30:31] 2557entry: 2558 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) 2559 ret void 2560} 2561 2562define void @memcpy_p3_p1_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 2563; CHECK-LABEL: memcpy_p3_p1_sz32_align_16_16: 2564; CHECK: ; %bb.0: ; %entry 2565; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2566; CHECK-NEXT: s_clause 0x1 2567; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2568; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2569; CHECK-NEXT: s_waitcnt vmcnt(1) 2570; CHECK-NEXT: ds_write_b128 v0, v[3:6] 2571; CHECK-NEXT: s_waitcnt vmcnt(0) 2572; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16 2573; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2574; CHECK-NEXT: s_setpc_b64 s[30:31] 2575entry: 2576 tail call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) 2577 ret void 2578} 2579 2580define void @memcpy_p3_p3_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 2581; CHECK-LABEL: memcpy_p3_p3_sz16_align_1_1: 2582; CHECK: ; %bb.0: ; %entry 2583; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2584; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 2585; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2586; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2587; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2588; CHECK-NEXT: s_setpc_b64 s[30:31] 2589entry: 2590 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) 2591 ret void 2592} 2593 2594define void @memcpy_p3_p3_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 2595; CHECK-LABEL: memcpy_p3_p3_sz31_align_1_1: 2596; CHECK: ; %bb.0: ; %entry 2597; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2598; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23 2599; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16 2600; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 2601; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2602; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 2603; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2604; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2605; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2606; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2607; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2608; CHECK-NEXT: s_setpc_b64 s[30:31] 2609entry: 2610 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) 2611 ret void 2612} 2613 2614define void @memcpy_p3_p3_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 2615; CHECK-LABEL: memcpy_p3_p3_sz32_align_1_1: 2616; CHECK: ; %bb.0: ; %entry 2617; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2618; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 2619; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 2620; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2621; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 2622; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2623; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 2624; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2625; CHECK-NEXT: s_setpc_b64 s[30:31] 2626entry: 2627 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) 2628 ret void 2629} 2630 2631define void @memcpy_p3_p3_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 2632; CHECK-LABEL: memcpy_p3_p3_sz16_align_2_2: 2633; CHECK: ; %bb.0: ; %entry 2634; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2635; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 2636; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2637; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2638; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2639; CHECK-NEXT: s_setpc_b64 s[30:31] 2640entry: 2641 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) 2642 ret void 2643} 2644 2645define void @memcpy_p3_p3_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 2646; CHECK-LABEL: memcpy_p3_p3_sz31_align_2_2: 2647; CHECK: ; %bb.0: ; %entry 2648; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2649; CHECK-NEXT: ds_read_b64 v[5:6], v1 offset:23 2650; CHECK-NEXT: ds_read_b64 v[7:8], v1 offset:16 2651; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 2652; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2653; CHECK-NEXT: ds_write_b64 v0, v[5:6] offset:23 2654; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2655; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2656; CHECK-NEXT: s_waitcnt lgkmcnt(2) 2657; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2658; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2659; CHECK-NEXT: s_setpc_b64 s[30:31] 2660entry: 2661 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) 2662 ret void 2663} 2664 2665define void @memcpy_p3_p3_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 2666; CHECK-LABEL: memcpy_p3_p3_sz32_align_2_2: 2667; CHECK: ; %bb.0: ; %entry 2668; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2669; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 2670; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 2671; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2672; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 2673; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2674; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 2675; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2676; CHECK-NEXT: s_setpc_b64 s[30:31] 2677entry: 2678 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) 2679 ret void 2680} 2681 2682define void @memcpy_p3_p3_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 2683; CHECK-LABEL: memcpy_p3_p3_sz16_align_8_8: 2684; CHECK: ; %bb.0: ; %entry 2685; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2686; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 2687; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2688; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2689; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2690; CHECK-NEXT: s_setpc_b64 s[30:31] 2691entry: 2692 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) 2693 ret void 2694} 2695 2696define void @memcpy_p3_p3_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 2697; CHECK-LABEL: memcpy_p3_p3_sz31_align_8_8: 2698; CHECK: ; %bb.0: ; %entry 2699; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2700; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 2701; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 2702; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2703; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 2704; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2705; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15 2706; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2707; CHECK-NEXT: s_setpc_b64 s[30:31] 2708entry: 2709 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) 2710 ret void 2711} 2712 2713define void @memcpy_p3_p3_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 2714; CHECK-LABEL: memcpy_p3_p3_sz32_align_8_8: 2715; CHECK: ; %bb.0: ; %entry 2716; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2717; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset0:2 offset1:3 2718; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset1:1 2719; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2720; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset0:2 offset1:3 2721; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2722; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 2723; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2724; CHECK-NEXT: s_setpc_b64 s[30:31] 2725entry: 2726 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) 2727 ret void 2728} 2729 2730define void @memcpy_p3_p3_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 2731; CHECK-LABEL: memcpy_p3_p3_sz16_align_16_16: 2732; CHECK: ; %bb.0: ; %entry 2733; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2734; CHECK-NEXT: ds_read_b128 v[1:4], v1 2735; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2736; CHECK-NEXT: ds_write_b128 v0, v[1:4] 2737; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2738; CHECK-NEXT: s_setpc_b64 s[30:31] 2739entry: 2740 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) 2741 ret void 2742} 2743 2744define void @memcpy_p3_p3_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 2745; CHECK-LABEL: memcpy_p3_p3_sz31_align_16_16: 2746; CHECK: ; %bb.0: ; %entry 2747; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2748; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:15 2749; CHECK-NEXT: ds_read_b128 v[6:9], v1 2750; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2751; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15 2752; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2753; CHECK-NEXT: ds_write_b128 v0, v[6:9] 2754; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2755; CHECK-NEXT: s_setpc_b64 s[30:31] 2756entry: 2757 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) 2758 ret void 2759} 2760 2761define void @memcpy_p3_p3_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 2762; CHECK-LABEL: memcpy_p3_p3_sz32_align_16_16: 2763; CHECK: ; %bb.0: ; %entry 2764; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2765; CHECK-NEXT: ds_read_b128 v[2:5], v1 offset:16 2766; CHECK-NEXT: ds_read_b128 v[6:9], v1 2767; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2768; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:16 2769; CHECK-NEXT: s_waitcnt lgkmcnt(1) 2770; CHECK-NEXT: ds_write_b128 v0, v[6:9] 2771; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2772; CHECK-NEXT: s_setpc_b64 s[30:31] 2773entry: 2774 tail call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) 2775 ret void 2776} 2777 2778define void @memcpy_p3_p4_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 2779; CHECK-LABEL: memcpy_p3_p4_sz16_align_1_1: 2780; CHECK: ; %bb.0: ; %entry 2781; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2782; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2783; CHECK-NEXT: s_waitcnt vmcnt(0) 2784; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2785; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2786; CHECK-NEXT: s_setpc_b64 s[30:31] 2787entry: 2788 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) 2789 ret void 2790} 2791 2792define void @memcpy_p3_p4_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 2793; CHECK-LABEL: memcpy_p3_p4_sz31_align_1_1: 2794; CHECK: ; %bb.0: ; %entry 2795; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2796; CHECK-NEXT: s_clause 0x2 2797; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2798; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 2799; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 2800; CHECK-NEXT: s_waitcnt vmcnt(2) 2801; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2802; CHECK-NEXT: s_waitcnt vmcnt(1) 2803; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2804; CHECK-NEXT: s_waitcnt vmcnt(0) 2805; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 2806; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2807; CHECK-NEXT: s_setpc_b64 s[30:31] 2808entry: 2809 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) 2810 ret void 2811} 2812 2813define void @memcpy_p3_p4_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 2814; CHECK-LABEL: memcpy_p3_p4_sz32_align_1_1: 2815; CHECK: ; %bb.0: ; %entry 2816; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2817; CHECK-NEXT: s_clause 0x1 2818; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2819; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2820; CHECK-NEXT: s_waitcnt vmcnt(1) 2821; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2822; CHECK-NEXT: s_waitcnt vmcnt(0) 2823; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2824; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2825; CHECK-NEXT: s_setpc_b64 s[30:31] 2826entry: 2827 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) 2828 ret void 2829} 2830 2831define void @memcpy_p3_p4_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 2832; CHECK-LABEL: memcpy_p3_p4_sz16_align_2_2: 2833; CHECK: ; %bb.0: ; %entry 2834; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2835; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2836; CHECK-NEXT: s_waitcnt vmcnt(0) 2837; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2838; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2839; CHECK-NEXT: s_setpc_b64 s[30:31] 2840entry: 2841 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) 2842 ret void 2843} 2844 2845define void @memcpy_p3_p4_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 2846; CHECK-LABEL: memcpy_p3_p4_sz31_align_2_2: 2847; CHECK: ; %bb.0: ; %entry 2848; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2849; CHECK-NEXT: s_clause 0x2 2850; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2851; CHECK-NEXT: global_load_dwordx2 v[7:8], v[1:2], off offset:16 2852; CHECK-NEXT: global_load_dwordx2 v[1:2], v[1:2], off offset:23 2853; CHECK-NEXT: s_waitcnt vmcnt(2) 2854; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2855; CHECK-NEXT: s_waitcnt vmcnt(1) 2856; CHECK-NEXT: ds_write_b64 v0, v[7:8] offset:16 2857; CHECK-NEXT: s_waitcnt vmcnt(0) 2858; CHECK-NEXT: ds_write_b64 v0, v[1:2] offset:23 2859; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2860; CHECK-NEXT: s_setpc_b64 s[30:31] 2861entry: 2862 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) 2863 ret void 2864} 2865 2866define void @memcpy_p3_p4_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 2867; CHECK-LABEL: memcpy_p3_p4_sz32_align_2_2: 2868; CHECK: ; %bb.0: ; %entry 2869; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2870; CHECK-NEXT: s_clause 0x1 2871; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2872; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2873; CHECK-NEXT: s_waitcnt vmcnt(1) 2874; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2875; CHECK-NEXT: s_waitcnt vmcnt(0) 2876; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2877; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2878; CHECK-NEXT: s_setpc_b64 s[30:31] 2879entry: 2880 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) 2881 ret void 2882} 2883 2884define void @memcpy_p3_p4_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 2885; CHECK-LABEL: memcpy_p3_p4_sz16_align_8_8: 2886; CHECK: ; %bb.0: ; %entry 2887; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2888; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2889; CHECK-NEXT: s_waitcnt vmcnt(0) 2890; CHECK-NEXT: ds_write2_b64 v0, v[1:2], v[3:4] offset1:1 2891; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2892; CHECK-NEXT: s_setpc_b64 s[30:31] 2893entry: 2894 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) 2895 ret void 2896} 2897 2898define void @memcpy_p3_p4_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 2899; CHECK-LABEL: memcpy_p3_p4_sz31_align_8_8: 2900; CHECK: ; %bb.0: ; %entry 2901; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2902; CHECK-NEXT: s_clause 0x1 2903; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2904; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 2905; CHECK-NEXT: s_waitcnt vmcnt(1) 2906; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2907; CHECK-NEXT: s_waitcnt vmcnt(0) 2908; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 2909; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2910; CHECK-NEXT: s_setpc_b64 s[30:31] 2911entry: 2912 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) 2913 ret void 2914} 2915 2916define void @memcpy_p3_p4_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 2917; CHECK-LABEL: memcpy_p3_p4_sz32_align_8_8: 2918; CHECK: ; %bb.0: ; %entry 2919; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2920; CHECK-NEXT: s_clause 0x1 2921; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2922; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2923; CHECK-NEXT: s_waitcnt vmcnt(1) 2924; CHECK-NEXT: ds_write2_b64 v0, v[3:4], v[5:6] offset1:1 2925; CHECK-NEXT: s_waitcnt vmcnt(0) 2926; CHECK-NEXT: ds_write2_b64 v0, v[7:8], v[9:10] offset0:2 offset1:3 2927; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2928; CHECK-NEXT: s_setpc_b64 s[30:31] 2929entry: 2930 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) 2931 ret void 2932} 2933 2934define void @memcpy_p3_p4_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 2935; CHECK-LABEL: memcpy_p3_p4_sz16_align_16_16: 2936; CHECK: ; %bb.0: ; %entry 2937; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2938; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 2939; CHECK-NEXT: s_waitcnt vmcnt(0) 2940; CHECK-NEXT: ds_write_b128 v0, v[1:4] 2941; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2942; CHECK-NEXT: s_setpc_b64 s[30:31] 2943entry: 2944 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) 2945 ret void 2946} 2947 2948define void @memcpy_p3_p4_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 2949; CHECK-LABEL: memcpy_p3_p4_sz31_align_16_16: 2950; CHECK: ; %bb.0: ; %entry 2951; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2952; CHECK-NEXT: s_clause 0x1 2953; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2954; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 2955; CHECK-NEXT: s_waitcnt vmcnt(1) 2956; CHECK-NEXT: ds_write_b128 v0, v[3:6] 2957; CHECK-NEXT: s_waitcnt vmcnt(0) 2958; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:15 2959; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2960; CHECK-NEXT: s_setpc_b64 s[30:31] 2961entry: 2962 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) 2963 ret void 2964} 2965 2966define void @memcpy_p3_p4_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 2967; CHECK-LABEL: memcpy_p3_p4_sz32_align_16_16: 2968; CHECK: ; %bb.0: ; %entry 2969; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2970; CHECK-NEXT: s_clause 0x1 2971; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 2972; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 2973; CHECK-NEXT: s_waitcnt vmcnt(1) 2974; CHECK-NEXT: ds_write_b128 v0, v[3:6] 2975; CHECK-NEXT: s_waitcnt vmcnt(0) 2976; CHECK-NEXT: ds_write_b128 v0, v[7:10] offset:16 2977; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2978; CHECK-NEXT: s_setpc_b64 s[30:31] 2979entry: 2980 tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) 2981 ret void 2982} 2983 2984define void @memcpy_p3_p5_sz16_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 2985; CHECK-LABEL: memcpy_p3_p5_sz16_align_1_1: 2986; CHECK: ; %bb.0: ; %entry 2987; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2988; CHECK-NEXT: s_clause 0x3 2989; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 2990; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 2991; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 2992; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 2993; CHECK-NEXT: s_waitcnt vmcnt(0) 2994; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 2995; CHECK-NEXT: s_waitcnt lgkmcnt(0) 2996; CHECK-NEXT: s_setpc_b64 s[30:31] 2997entry: 2998 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) 2999 ret void 3000} 3001 3002define void @memcpy_p3_p5_sz31_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 3003; CHECK-LABEL: memcpy_p3_p5_sz31_align_1_1: 3004; CHECK: ; %bb.0: ; %entry 3005; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3006; CHECK-NEXT: s_clause 0x7 3007; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3008; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3009; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3010; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3011; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3012; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3013; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 3014; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 3015; CHECK-NEXT: s_waitcnt vmcnt(4) 3016; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3017; CHECK-NEXT: s_waitcnt vmcnt(2) 3018; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16 3019; CHECK-NEXT: s_waitcnt vmcnt(0) 3020; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23 3021; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3022; CHECK-NEXT: s_setpc_b64 s[30:31] 3023entry: 3024 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) 3025 ret void 3026} 3027 3028define void @memcpy_p3_p5_sz32_align_1_1(ptr addrspace(3) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 3029; CHECK-LABEL: memcpy_p3_p5_sz32_align_1_1: 3030; CHECK: ; %bb.0: ; %entry 3031; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3032; CHECK-NEXT: s_clause 0x7 3033; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3034; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3035; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3036; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3037; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3038; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3039; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 3040; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 3041; CHECK-NEXT: s_waitcnt vmcnt(4) 3042; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3043; CHECK-NEXT: s_waitcnt vmcnt(0) 3044; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 3045; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3046; CHECK-NEXT: s_setpc_b64 s[30:31] 3047entry: 3048 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) 3049 ret void 3050} 3051 3052define void @memcpy_p3_p5_sz16_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 3053; CHECK-LABEL: memcpy_p3_p5_sz16_align_2_2: 3054; CHECK: ; %bb.0: ; %entry 3055; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3056; CHECK-NEXT: s_clause 0x3 3057; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3058; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3059; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3060; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3061; CHECK-NEXT: s_waitcnt vmcnt(0) 3062; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3063; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3064; CHECK-NEXT: s_setpc_b64 s[30:31] 3065entry: 3066 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) 3067 ret void 3068} 3069 3070define void @memcpy_p3_p5_sz31_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 3071; CHECK-LABEL: memcpy_p3_p5_sz31_align_2_2: 3072; CHECK: ; %bb.0: ; %entry 3073; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3074; CHECK-NEXT: s_clause 0x7 3075; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3076; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3077; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3078; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3079; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3080; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3081; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 3082; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 3083; CHECK-NEXT: s_waitcnt vmcnt(4) 3084; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3085; CHECK-NEXT: s_waitcnt vmcnt(2) 3086; CHECK-NEXT: ds_write_b64 v0, v[6:7] offset:16 3087; CHECK-NEXT: s_waitcnt vmcnt(0) 3088; CHECK-NEXT: ds_write_b64 v0, v[8:9] offset:23 3089; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3090; CHECK-NEXT: s_setpc_b64 s[30:31] 3091entry: 3092 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) 3093 ret void 3094} 3095 3096define void @memcpy_p3_p5_sz32_align_2_2(ptr addrspace(3) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 3097; CHECK-LABEL: memcpy_p3_p5_sz32_align_2_2: 3098; CHECK: ; %bb.0: ; %entry 3099; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3100; CHECK-NEXT: s_clause 0x7 3101; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3102; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3103; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3104; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3105; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3106; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3107; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 3108; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 3109; CHECK-NEXT: s_waitcnt vmcnt(4) 3110; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3111; CHECK-NEXT: s_waitcnt vmcnt(0) 3112; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 3113; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3114; CHECK-NEXT: s_setpc_b64 s[30:31] 3115entry: 3116 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) 3117 ret void 3118} 3119 3120define void @memcpy_p3_p5_sz16_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 3121; CHECK-LABEL: memcpy_p3_p5_sz16_align_8_8: 3122; CHECK: ; %bb.0: ; %entry 3123; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3124; CHECK-NEXT: s_clause 0x3 3125; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3126; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3127; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3128; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3129; CHECK-NEXT: s_waitcnt vmcnt(0) 3130; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3131; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3132; CHECK-NEXT: s_setpc_b64 s[30:31] 3133entry: 3134 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) 3135 ret void 3136} 3137 3138define void @memcpy_p3_p5_sz31_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 3139; CHECK-LABEL: memcpy_p3_p5_sz31_align_8_8: 3140; CHECK: ; %bb.0: ; %entry 3141; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3142; CHECK-NEXT: s_clause 0x7 3143; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen 3144; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 3145; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 3146; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:12 3147; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 3148; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 3149; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 3150; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 3151; CHECK-NEXT: s_waitcnt vmcnt(4) 3152; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset1:1 3153; CHECK-NEXT: s_waitcnt vmcnt(0) 3154; CHECK-NEXT: ds_write_b128 v0, v[2:5] offset:15 3155; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3156; CHECK-NEXT: s_setpc_b64 s[30:31] 3157entry: 3158 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) 3159 ret void 3160} 3161 3162define void @memcpy_p3_p5_sz32_align_8_8(ptr addrspace(3) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 3163; CHECK-LABEL: memcpy_p3_p5_sz32_align_8_8: 3164; CHECK: ; %bb.0: ; %entry 3165; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3166; CHECK-NEXT: s_clause 0x7 3167; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3168; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3169; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3170; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3171; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3172; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3173; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 3174; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 3175; CHECK-NEXT: s_waitcnt vmcnt(4) 3176; CHECK-NEXT: ds_write2_b64 v0, v[2:3], v[4:5] offset1:1 3177; CHECK-NEXT: s_waitcnt vmcnt(0) 3178; CHECK-NEXT: ds_write2_b64 v0, v[6:7], v[8:9] offset0:2 offset1:3 3179; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3180; CHECK-NEXT: s_setpc_b64 s[30:31] 3181entry: 3182 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) 3183 ret void 3184} 3185 3186define void @memcpy_p3_p5_sz16_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 3187; CHECK-LABEL: memcpy_p3_p5_sz16_align_16_16: 3188; CHECK: ; %bb.0: ; %entry 3189; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3190; CHECK-NEXT: s_clause 0x3 3191; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3192; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3193; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3194; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3195; CHECK-NEXT: s_waitcnt vmcnt(0) 3196; CHECK-NEXT: ds_write_b128 v0, v[2:5] 3197; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3198; CHECK-NEXT: s_setpc_b64 s[30:31] 3199entry: 3200 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) 3201 ret void 3202} 3203 3204define void @memcpy_p3_p5_sz31_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 3205; CHECK-LABEL: memcpy_p3_p5_sz31_align_16_16: 3206; CHECK: ; %bb.0: ; %entry 3207; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3208; CHECK-NEXT: s_clause 0x7 3209; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3210; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3211; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3212; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3213; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:15 3214; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:19 3215; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:23 3216; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:27 3217; CHECK-NEXT: s_waitcnt vmcnt(4) 3218; CHECK-NEXT: ds_write_b128 v0, v[2:5] 3219; CHECK-NEXT: s_waitcnt vmcnt(0) 3220; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:15 3221; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3222; CHECK-NEXT: s_setpc_b64 s[30:31] 3223entry: 3224 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) 3225 ret void 3226} 3227 3228define void @memcpy_p3_p5_sz32_align_16_16(ptr addrspace(3) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 3229; CHECK-LABEL: memcpy_p3_p5_sz32_align_16_16: 3230; CHECK: ; %bb.0: ; %entry 3231; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3232; CHECK-NEXT: s_clause 0x7 3233; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 3234; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 3235; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 3236; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:12 3237; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:16 3238; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:20 3239; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:24 3240; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:28 3241; CHECK-NEXT: s_waitcnt vmcnt(4) 3242; CHECK-NEXT: ds_write_b128 v0, v[2:5] 3243; CHECK-NEXT: s_waitcnt vmcnt(0) 3244; CHECK-NEXT: ds_write_b128 v0, v[6:9] offset:16 3245; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3246; CHECK-NEXT: s_setpc_b64 s[30:31] 3247entry: 3248 tail call void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) 3249 ret void 3250} 3251 3252define void @memcpy_p5_p0_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 3253; CHECK-LABEL: memcpy_p5_p0_sz16_align_1_1: 3254; CHECK: ; %bb.0: ; %entry 3255; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3256; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3257; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3258; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3259; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3260; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3261; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3262; CHECK-NEXT: s_setpc_b64 s[30:31] 3263entry: 3264 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 16, i1 false) 3265 ret void 3266} 3267 3268define void @memcpy_p5_p0_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 3269; CHECK-LABEL: memcpy_p5_p0_sz31_align_1_1: 3270; CHECK: ; %bb.0: ; %entry 3271; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3272; CHECK-NEXT: s_clause 0x3 3273; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 3274; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 3275; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 3276; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3277; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) 3278; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 3279; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 3280; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 3281; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3282; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 3283; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 3284; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 3285; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3286; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3287; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3288; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3289; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3290; CHECK-NEXT: s_setpc_b64 s[30:31] 3291entry: 3292 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 31, i1 false) 3293 ret void 3294} 3295 3296define void @memcpy_p5_p0_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(0) align 1 readonly %src) { 3297; CHECK-LABEL: memcpy_p5_p0_sz32_align_1_1: 3298; CHECK: ; %bb.0: ; %entry 3299; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3300; CHECK-NEXT: s_clause 0x1 3301; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 3302; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3303; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3304; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 3305; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 3306; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 3307; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 3308; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3309; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3310; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3311; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3312; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3313; CHECK-NEXT: s_setpc_b64 s[30:31] 3314entry: 3315 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 32, i1 false) 3316 ret void 3317} 3318 3319define void @memcpy_p5_p0_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 3320; CHECK-LABEL: memcpy_p5_p0_sz16_align_2_2: 3321; CHECK: ; %bb.0: ; %entry 3322; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3323; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3324; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3325; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3326; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3327; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3328; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3329; CHECK-NEXT: s_setpc_b64 s[30:31] 3330entry: 3331 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 16, i1 false) 3332 ret void 3333} 3334 3335define void @memcpy_p5_p0_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 3336; CHECK-LABEL: memcpy_p5_p0_sz31_align_2_2: 3337; CHECK: ; %bb.0: ; %entry 3338; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3339; CHECK-NEXT: s_clause 0x3 3340; CHECK-NEXT: flat_load_ubyte v8, v[1:2] offset:30 3341; CHECK-NEXT: flat_load_ushort v9, v[1:2] offset:28 3342; CHECK-NEXT: flat_load_dwordx3 v[5:7], v[1:2] offset:16 3343; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3344; CHECK-NEXT: s_waitcnt vmcnt(3) lgkmcnt(3) 3345; CHECK-NEXT: buffer_store_byte v8, v0, s[0:3], 0 offen offset:30 3346; CHECK-NEXT: s_waitcnt vmcnt(2) lgkmcnt(2) 3347; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 3348; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3349; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 3350; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 3351; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 3352; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3353; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3354; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3355; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3356; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3357; CHECK-NEXT: s_setpc_b64 s[30:31] 3358entry: 3359 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 31, i1 false) 3360 ret void 3361} 3362 3363define void @memcpy_p5_p0_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(0) align 2 readonly %src) { 3364; CHECK-LABEL: memcpy_p5_p0_sz32_align_2_2: 3365; CHECK: ; %bb.0: ; %entry 3366; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3367; CHECK-NEXT: s_clause 0x1 3368; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 3369; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3370; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3371; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 3372; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 3373; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 3374; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 3375; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3376; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3377; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3378; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3379; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3380; CHECK-NEXT: s_setpc_b64 s[30:31] 3381entry: 3382 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(0) noundef nonnull align 2 %src, i64 32, i1 false) 3383 ret void 3384} 3385 3386define void @memcpy_p5_p0_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 3387; CHECK-LABEL: memcpy_p5_p0_sz16_align_8_8: 3388; CHECK: ; %bb.0: ; %entry 3389; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3390; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3391; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3392; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3393; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3394; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3395; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3396; CHECK-NEXT: s_setpc_b64 s[30:31] 3397entry: 3398 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 16, i1 false) 3399 ret void 3400} 3401 3402define void @memcpy_p5_p0_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 3403; CHECK-LABEL: memcpy_p5_p0_sz31_align_8_8: 3404; CHECK: ; %bb.0: ; %entry 3405; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3406; CHECK-NEXT: s_clause 0x1 3407; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 3408; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3409; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3410; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 3411; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 3412; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19 3413; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15 3414; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3415; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3416; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3417; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3418; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3419; CHECK-NEXT: s_setpc_b64 s[30:31] 3420entry: 3421 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 31, i1 false) 3422 ret void 3423} 3424 3425define void @memcpy_p5_p0_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(0) align 8 readonly %src) { 3426; CHECK-LABEL: memcpy_p5_p0_sz32_align_8_8: 3427; CHECK: ; %bb.0: ; %entry 3428; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3429; CHECK-NEXT: s_clause 0x1 3430; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 3431; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3432; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3433; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 3434; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 3435; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 3436; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 3437; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3438; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3439; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3440; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3441; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3442; CHECK-NEXT: s_setpc_b64 s[30:31] 3443entry: 3444 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(0) noundef nonnull align 8 %src, i64 32, i1 false) 3445 ret void 3446} 3447 3448define void @memcpy_p5_p0_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 3449; CHECK-LABEL: memcpy_p5_p0_sz16_align_16_16: 3450; CHECK: ; %bb.0: ; %entry 3451; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3452; CHECK-NEXT: flat_load_dwordx4 v[1:4], v[1:2] 3453; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3454; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3455; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3456; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3457; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3458; CHECK-NEXT: s_setpc_b64 s[30:31] 3459entry: 3460 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 16, i1 false) 3461 ret void 3462} 3463 3464define void @memcpy_p5_p0_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 3465; CHECK-LABEL: memcpy_p5_p0_sz31_align_16_16: 3466; CHECK: ; %bb.0: ; %entry 3467; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3468; CHECK-NEXT: s_clause 0x1 3469; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:15 3470; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3471; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3472; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:27 3473; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:23 3474; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:19 3475; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:15 3476; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3477; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3478; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3479; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3480; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3481; CHECK-NEXT: s_setpc_b64 s[30:31] 3482entry: 3483 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 31, i1 false) 3484 ret void 3485} 3486 3487define void @memcpy_p5_p0_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(0) align 16 readonly %src) { 3488; CHECK-LABEL: memcpy_p5_p0_sz32_align_16_16: 3489; CHECK: ; %bb.0: ; %entry 3490; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3491; CHECK-NEXT: s_clause 0x1 3492; CHECK-NEXT: flat_load_dwordx4 v[3:6], v[1:2] offset:16 3493; CHECK-NEXT: flat_load_dwordx4 v[7:10], v[1:2] 3494; CHECK-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) 3495; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:28 3496; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:24 3497; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:20 3498; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:16 3499; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3500; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:12 3501; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:8 3502; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:4 3503; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen 3504; CHECK-NEXT: s_setpc_b64 s[30:31] 3505entry: 3506 tail call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(0) noundef nonnull align 16 %src, i64 32, i1 false) 3507 ret void 3508} 3509 3510define void @memcpy_p5_p1_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 3511; CHECK-LABEL: memcpy_p5_p1_sz16_align_1_1: 3512; CHECK: ; %bb.0: ; %entry 3513; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3514; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3515; CHECK-NEXT: s_waitcnt vmcnt(0) 3516; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3517; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3518; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3519; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3520; CHECK-NEXT: s_setpc_b64 s[30:31] 3521entry: 3522 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 16, i1 false) 3523 ret void 3524} 3525 3526define void @memcpy_p5_p1_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 3527; CHECK-LABEL: memcpy_p5_p1_sz31_align_1_1: 3528; CHECK: ; %bb.0: ; %entry 3529; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3530; CHECK-NEXT: s_clause 0x3 3531; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 3532; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 3533; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 3534; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3535; CHECK-NEXT: s_waitcnt vmcnt(3) 3536; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 3537; CHECK-NEXT: s_waitcnt vmcnt(2) 3538; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 3539; CHECK-NEXT: s_waitcnt vmcnt(1) 3540; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 3541; CHECK-NEXT: s_waitcnt vmcnt(0) 3542; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3543; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3544; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3545; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3546; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 3547; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 3548; CHECK-NEXT: s_setpc_b64 s[30:31] 3549entry: 3550 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 31, i1 false) 3551 ret void 3552} 3553 3554define void @memcpy_p5_p1_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(1) align 1 readonly %src) { 3555; CHECK-LABEL: memcpy_p5_p1_sz32_align_1_1: 3556; CHECK: ; %bb.0: ; %entry 3557; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3558; CHECK-NEXT: s_clause 0x1 3559; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3560; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 3561; CHECK-NEXT: s_waitcnt vmcnt(1) 3562; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3563; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3564; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3565; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3566; CHECK-NEXT: s_waitcnt vmcnt(0) 3567; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 3568; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 3569; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 3570; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 3571; CHECK-NEXT: s_setpc_b64 s[30:31] 3572entry: 3573 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 32, i1 false) 3574 ret void 3575} 3576 3577define void @memcpy_p5_p1_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 3578; CHECK-LABEL: memcpy_p5_p1_sz16_align_2_2: 3579; CHECK: ; %bb.0: ; %entry 3580; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3581; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3582; CHECK-NEXT: s_waitcnt vmcnt(0) 3583; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3584; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3585; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3586; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3587; CHECK-NEXT: s_setpc_b64 s[30:31] 3588entry: 3589 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 16, i1 false) 3590 ret void 3591} 3592 3593define void @memcpy_p5_p1_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 3594; CHECK-LABEL: memcpy_p5_p1_sz31_align_2_2: 3595; CHECK: ; %bb.0: ; %entry 3596; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3597; CHECK-NEXT: s_clause 0x3 3598; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 3599; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 3600; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 3601; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3602; CHECK-NEXT: s_waitcnt vmcnt(3) 3603; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 3604; CHECK-NEXT: s_waitcnt vmcnt(2) 3605; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 3606; CHECK-NEXT: s_waitcnt vmcnt(1) 3607; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 3608; CHECK-NEXT: s_waitcnt vmcnt(0) 3609; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3610; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3611; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3612; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3613; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 3614; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 3615; CHECK-NEXT: s_setpc_b64 s[30:31] 3616entry: 3617 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 31, i1 false) 3618 ret void 3619} 3620 3621define void @memcpy_p5_p1_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(1) align 2 readonly %src) { 3622; CHECK-LABEL: memcpy_p5_p1_sz32_align_2_2: 3623; CHECK: ; %bb.0: ; %entry 3624; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3625; CHECK-NEXT: s_clause 0x1 3626; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3627; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 3628; CHECK-NEXT: s_waitcnt vmcnt(1) 3629; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3630; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3631; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3632; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3633; CHECK-NEXT: s_waitcnt vmcnt(0) 3634; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 3635; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 3636; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 3637; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 3638; CHECK-NEXT: s_setpc_b64 s[30:31] 3639entry: 3640 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(1) noundef nonnull align 2 %src, i64 32, i1 false) 3641 ret void 3642} 3643 3644define void @memcpy_p5_p1_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 3645; CHECK-LABEL: memcpy_p5_p1_sz16_align_8_8: 3646; CHECK: ; %bb.0: ; %entry 3647; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3648; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3649; CHECK-NEXT: s_waitcnt vmcnt(0) 3650; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3651; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3652; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3653; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3654; CHECK-NEXT: s_setpc_b64 s[30:31] 3655entry: 3656 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 16, i1 false) 3657 ret void 3658} 3659 3660define void @memcpy_p5_p1_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 3661; CHECK-LABEL: memcpy_p5_p1_sz31_align_8_8: 3662; CHECK: ; %bb.0: ; %entry 3663; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3664; CHECK-NEXT: s_clause 0x1 3665; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3666; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 3667; CHECK-NEXT: s_waitcnt vmcnt(1) 3668; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3669; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3670; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3671; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3672; CHECK-NEXT: s_waitcnt vmcnt(0) 3673; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 3674; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 3675; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 3676; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 3677; CHECK-NEXT: s_setpc_b64 s[30:31] 3678entry: 3679 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 31, i1 false) 3680 ret void 3681} 3682 3683define void @memcpy_p5_p1_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(1) align 8 readonly %src) { 3684; CHECK-LABEL: memcpy_p5_p1_sz32_align_8_8: 3685; CHECK: ; %bb.0: ; %entry 3686; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3687; CHECK-NEXT: s_clause 0x1 3688; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3689; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 3690; CHECK-NEXT: s_waitcnt vmcnt(1) 3691; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3692; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3693; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3694; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3695; CHECK-NEXT: s_waitcnt vmcnt(0) 3696; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 3697; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 3698; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 3699; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 3700; CHECK-NEXT: s_setpc_b64 s[30:31] 3701entry: 3702 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(1) noundef nonnull align 8 %src, i64 32, i1 false) 3703 ret void 3704} 3705 3706define void @memcpy_p5_p1_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 3707; CHECK-LABEL: memcpy_p5_p1_sz16_align_16_16: 3708; CHECK: ; %bb.0: ; %entry 3709; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3710; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 3711; CHECK-NEXT: s_waitcnt vmcnt(0) 3712; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3713; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3714; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3715; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3716; CHECK-NEXT: s_setpc_b64 s[30:31] 3717entry: 3718 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 16, i1 false) 3719 ret void 3720} 3721 3722define void @memcpy_p5_p1_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 3723; CHECK-LABEL: memcpy_p5_p1_sz31_align_16_16: 3724; CHECK: ; %bb.0: ; %entry 3725; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3726; CHECK-NEXT: s_clause 0x1 3727; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3728; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 3729; CHECK-NEXT: s_waitcnt vmcnt(1) 3730; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3731; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3732; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3733; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3734; CHECK-NEXT: s_waitcnt vmcnt(0) 3735; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 3736; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 3737; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 3738; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 3739; CHECK-NEXT: s_setpc_b64 s[30:31] 3740entry: 3741 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 31, i1 false) 3742 ret void 3743} 3744 3745define void @memcpy_p5_p1_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(1) align 16 readonly %src) { 3746; CHECK-LABEL: memcpy_p5_p1_sz32_align_16_16: 3747; CHECK: ; %bb.0: ; %entry 3748; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3749; CHECK-NEXT: s_clause 0x1 3750; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 3751; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 3752; CHECK-NEXT: s_waitcnt vmcnt(1) 3753; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 3754; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 3755; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 3756; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 3757; CHECK-NEXT: s_waitcnt vmcnt(0) 3758; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 3759; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 3760; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 3761; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 3762; CHECK-NEXT: s_setpc_b64 s[30:31] 3763entry: 3764 tail call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(1) noundef nonnull align 16 %src, i64 32, i1 false) 3765 ret void 3766} 3767 3768define void @memcpy_p5_p3_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 3769; CHECK-LABEL: memcpy_p5_p3_sz16_align_1_1: 3770; CHECK: ; %bb.0: ; %entry 3771; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3772; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 3773; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3774; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3775; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3776; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3777; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3778; CHECK-NEXT: s_setpc_b64 s[30:31] 3779entry: 3780 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 16, i1 false) 3781 ret void 3782} 3783 3784define void @memcpy_p5_p3_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 3785; CHECK-LABEL: memcpy_p5_p3_sz31_align_1_1: 3786; CHECK: ; %bb.0: ; %entry 3787; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3788; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 3789; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 3790; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 3791; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3792; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 3793; CHECK-NEXT: s_waitcnt lgkmcnt(4) 3794; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 3795; CHECK-NEXT: s_waitcnt lgkmcnt(3) 3796; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 3797; CHECK-NEXT: s_waitcnt lgkmcnt(2) 3798; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 3799; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3800; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3801; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3802; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3803; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3804; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3805; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 3806; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 3807; CHECK-NEXT: s_setpc_b64 s[30:31] 3808entry: 3809 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 31, i1 false) 3810 ret void 3811} 3812 3813define void @memcpy_p5_p3_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(3) align 1 readonly %src) { 3814; CHECK-LABEL: memcpy_p5_p3_sz32_align_1_1: 3815; CHECK: ; %bb.0: ; %entry 3816; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3817; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3818; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 3819; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3820; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3821; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3822; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3823; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3824; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3825; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 3826; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 3827; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 3828; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 3829; CHECK-NEXT: s_setpc_b64 s[30:31] 3830entry: 3831 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(3) noundef nonnull align 1 %src, i64 32, i1 false) 3832 ret void 3833} 3834 3835define void @memcpy_p5_p3_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 3836; CHECK-LABEL: memcpy_p5_p3_sz16_align_2_2: 3837; CHECK: ; %bb.0: ; %entry 3838; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3839; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 3840; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3841; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3842; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3843; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3844; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3845; CHECK-NEXT: s_setpc_b64 s[30:31] 3846entry: 3847 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 16, i1 false) 3848 ret void 3849} 3850 3851define void @memcpy_p5_p3_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 3852; CHECK-LABEL: memcpy_p5_p3_sz31_align_2_2: 3853; CHECK: ; %bb.0: ; %entry 3854; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3855; CHECK-NEXT: ds_read_b32 v8, v1 offset:24 3856; CHECK-NEXT: ds_read_u16 v9, v1 offset:28 3857; CHECK-NEXT: ds_read_u8 v10, v1 offset:30 3858; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3859; CHECK-NEXT: ds_read_b64 v[6:7], v1 offset:16 3860; CHECK-NEXT: s_waitcnt lgkmcnt(4) 3861; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 3862; CHECK-NEXT: s_waitcnt lgkmcnt(3) 3863; CHECK-NEXT: buffer_store_short v9, v0, s[0:3], 0 offen offset:28 3864; CHECK-NEXT: s_waitcnt lgkmcnt(2) 3865; CHECK-NEXT: buffer_store_byte v10, v0, s[0:3], 0 offen offset:30 3866; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3867; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3868; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3869; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3870; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3871; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3872; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 3873; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 3874; CHECK-NEXT: s_setpc_b64 s[30:31] 3875entry: 3876 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 31, i1 false) 3877 ret void 3878} 3879 3880define void @memcpy_p5_p3_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(3) align 2 readonly %src) { 3881; CHECK-LABEL: memcpy_p5_p3_sz32_align_2_2: 3882; CHECK: ; %bb.0: ; %entry 3883; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3884; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3885; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 3886; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3887; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3888; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3889; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3890; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3891; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3892; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 3893; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 3894; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 3895; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 3896; CHECK-NEXT: s_setpc_b64 s[30:31] 3897entry: 3898 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(3) noundef nonnull align 2 %src, i64 32, i1 false) 3899 ret void 3900} 3901 3902define void @memcpy_p5_p3_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 3903; CHECK-LABEL: memcpy_p5_p3_sz16_align_8_8: 3904; CHECK: ; %bb.0: ; %entry 3905; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; CHECK-NEXT: ds_read2_b64 v[1:4], v1 offset1:1 3907; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3908; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3909; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3910; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3911; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3912; CHECK-NEXT: s_setpc_b64 s[30:31] 3913entry: 3914 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 16, i1 false) 3915 ret void 3916} 3917 3918define void @memcpy_p5_p3_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 3919; CHECK-LABEL: memcpy_p5_p3_sz31_align_8_8: 3920; CHECK: ; %bb.0: ; %entry 3921; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3922; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3923; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 3924; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3925; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3926; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3927; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3928; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3929; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3930; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 3931; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 3932; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19 3933; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15 3934; CHECK-NEXT: s_setpc_b64 s[30:31] 3935entry: 3936 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 31, i1 false) 3937 ret void 3938} 3939 3940define void @memcpy_p5_p3_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(3) align 8 readonly %src) { 3941; CHECK-LABEL: memcpy_p5_p3_sz32_align_8_8: 3942; CHECK: ; %bb.0: ; %entry 3943; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3944; CHECK-NEXT: ds_read2_b64 v[2:5], v1 offset1:1 3945; CHECK-NEXT: ds_read2_b64 v[6:9], v1 offset0:2 offset1:3 3946; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3947; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3948; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3949; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3950; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3951; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3952; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 3953; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 3954; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 3955; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 3956; CHECK-NEXT: s_setpc_b64 s[30:31] 3957entry: 3958 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(3) noundef nonnull align 8 %src, i64 32, i1 false) 3959 ret void 3960} 3961 3962define void @memcpy_p5_p3_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 3963; CHECK-LABEL: memcpy_p5_p3_sz16_align_16_16: 3964; CHECK: ; %bb.0: ; %entry 3965; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3966; CHECK-NEXT: ds_read_b128 v[1:4], v1 3967; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3968; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 3969; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 3970; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 3971; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 3972; CHECK-NEXT: s_setpc_b64 s[30:31] 3973entry: 3974 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 16, i1 false) 3975 ret void 3976} 3977 3978define void @memcpy_p5_p3_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 3979; CHECK-LABEL: memcpy_p5_p3_sz31_align_16_16: 3980; CHECK: ; %bb.0: ; %entry 3981; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3982; CHECK-NEXT: ds_read_b128 v[2:5], v1 3983; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:15 3984; CHECK-NEXT: s_waitcnt lgkmcnt(1) 3985; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 3986; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 3987; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 3988; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 3989; CHECK-NEXT: s_waitcnt lgkmcnt(0) 3990; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:27 3991; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:23 3992; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:19 3993; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:15 3994; CHECK-NEXT: s_setpc_b64 s[30:31] 3995entry: 3996 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 31, i1 false) 3997 ret void 3998} 3999 4000define void @memcpy_p5_p3_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(3) align 16 readonly %src) { 4001; CHECK-LABEL: memcpy_p5_p3_sz32_align_16_16: 4002; CHECK: ; %bb.0: ; %entry 4003; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4004; CHECK-NEXT: ds_read_b128 v[2:5], v1 4005; CHECK-NEXT: ds_read_b128 v[6:9], v1 offset:16 4006; CHECK-NEXT: s_waitcnt lgkmcnt(1) 4007; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:12 4008; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 4009; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 4010; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 4011; CHECK-NEXT: s_waitcnt lgkmcnt(0) 4012; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:28 4013; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:24 4014; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:20 4015; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:16 4016; CHECK-NEXT: s_setpc_b64 s[30:31] 4017entry: 4018 tail call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(3) noundef nonnull align 16 %src, i64 32, i1 false) 4019 ret void 4020} 4021 4022define void @memcpy_p5_p4_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 4023; CHECK-LABEL: memcpy_p5_p4_sz16_align_1_1: 4024; CHECK: ; %bb.0: ; %entry 4025; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4026; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4027; CHECK-NEXT: s_waitcnt vmcnt(0) 4028; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4029; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4030; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4031; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4032; CHECK-NEXT: s_setpc_b64 s[30:31] 4033entry: 4034 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 16, i1 false) 4035 ret void 4036} 4037 4038define void @memcpy_p5_p4_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 4039; CHECK-LABEL: memcpy_p5_p4_sz31_align_1_1: 4040; CHECK: ; %bb.0: ; %entry 4041; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4042; CHECK-NEXT: s_clause 0x3 4043; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 4044; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 4045; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 4046; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4047; CHECK-NEXT: s_waitcnt vmcnt(3) 4048; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 4049; CHECK-NEXT: s_waitcnt vmcnt(2) 4050; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 4051; CHECK-NEXT: s_waitcnt vmcnt(1) 4052; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 4053; CHECK-NEXT: s_waitcnt vmcnt(0) 4054; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4055; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4056; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4057; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4058; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 4059; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 4060; CHECK-NEXT: s_setpc_b64 s[30:31] 4061entry: 4062 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 31, i1 false) 4063 ret void 4064} 4065 4066define void @memcpy_p5_p4_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(4) align 1 readonly %src) { 4067; CHECK-LABEL: memcpy_p5_p4_sz32_align_1_1: 4068; CHECK: ; %bb.0: ; %entry 4069; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4070; CHECK-NEXT: s_clause 0x1 4071; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4072; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 4073; CHECK-NEXT: s_waitcnt vmcnt(1) 4074; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4075; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4076; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4077; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4078; CHECK-NEXT: s_waitcnt vmcnt(0) 4079; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 4080; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 4081; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 4082; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 4083; CHECK-NEXT: s_setpc_b64 s[30:31] 4084entry: 4085 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 32, i1 false) 4086 ret void 4087} 4088 4089define void @memcpy_p5_p4_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 4090; CHECK-LABEL: memcpy_p5_p4_sz16_align_2_2: 4091; CHECK: ; %bb.0: ; %entry 4092; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4093; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4094; CHECK-NEXT: s_waitcnt vmcnt(0) 4095; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4096; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4097; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4098; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4099; CHECK-NEXT: s_setpc_b64 s[30:31] 4100entry: 4101 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 16, i1 false) 4102 ret void 4103} 4104 4105define void @memcpy_p5_p4_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 4106; CHECK-LABEL: memcpy_p5_p4_sz31_align_2_2: 4107; CHECK: ; %bb.0: ; %entry 4108; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4109; CHECK-NEXT: s_clause 0x3 4110; CHECK-NEXT: global_load_dwordx3 v[5:7], v[1:2], off offset:16 4111; CHECK-NEXT: global_load_ushort v8, v[1:2], off offset:28 4112; CHECK-NEXT: global_load_ubyte v9, v[1:2], off offset:30 4113; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4114; CHECK-NEXT: s_waitcnt vmcnt(3) 4115; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:24 4116; CHECK-NEXT: s_waitcnt vmcnt(2) 4117; CHECK-NEXT: buffer_store_short v8, v0, s[0:3], 0 offen offset:28 4118; CHECK-NEXT: s_waitcnt vmcnt(1) 4119; CHECK-NEXT: buffer_store_byte v9, v0, s[0:3], 0 offen offset:30 4120; CHECK-NEXT: s_waitcnt vmcnt(0) 4121; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4122; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4123; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4124; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4125; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:20 4126; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:16 4127; CHECK-NEXT: s_setpc_b64 s[30:31] 4128entry: 4129 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 31, i1 false) 4130 ret void 4131} 4132 4133define void @memcpy_p5_p4_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(4) align 2 readonly %src) { 4134; CHECK-LABEL: memcpy_p5_p4_sz32_align_2_2: 4135; CHECK: ; %bb.0: ; %entry 4136; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4137; CHECK-NEXT: s_clause 0x1 4138; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4139; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 4140; CHECK-NEXT: s_waitcnt vmcnt(1) 4141; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4142; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4143; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4144; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4145; CHECK-NEXT: s_waitcnt vmcnt(0) 4146; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 4147; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 4148; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 4149; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 4150; CHECK-NEXT: s_setpc_b64 s[30:31] 4151entry: 4152 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(4) noundef nonnull align 2 %src, i64 32, i1 false) 4153 ret void 4154} 4155 4156define void @memcpy_p5_p4_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 4157; CHECK-LABEL: memcpy_p5_p4_sz16_align_8_8: 4158; CHECK: ; %bb.0: ; %entry 4159; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4160; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4161; CHECK-NEXT: s_waitcnt vmcnt(0) 4162; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4163; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4164; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4165; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4166; CHECK-NEXT: s_setpc_b64 s[30:31] 4167entry: 4168 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 16, i1 false) 4169 ret void 4170} 4171 4172define void @memcpy_p5_p4_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 4173; CHECK-LABEL: memcpy_p5_p4_sz31_align_8_8: 4174; CHECK: ; %bb.0: ; %entry 4175; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4176; CHECK-NEXT: s_clause 0x1 4177; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4178; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 4179; CHECK-NEXT: s_waitcnt vmcnt(1) 4180; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4181; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4182; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4183; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4184; CHECK-NEXT: s_waitcnt vmcnt(0) 4185; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 4186; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 4187; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 4188; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 4189; CHECK-NEXT: s_setpc_b64 s[30:31] 4190entry: 4191 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 31, i1 false) 4192 ret void 4193} 4194 4195define void @memcpy_p5_p4_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(4) align 8 readonly %src) { 4196; CHECK-LABEL: memcpy_p5_p4_sz32_align_8_8: 4197; CHECK: ; %bb.0: ; %entry 4198; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4199; CHECK-NEXT: s_clause 0x1 4200; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4201; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 4202; CHECK-NEXT: s_waitcnt vmcnt(1) 4203; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4204; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4205; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4206; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4207; CHECK-NEXT: s_waitcnt vmcnt(0) 4208; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 4209; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 4210; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 4211; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 4212; CHECK-NEXT: s_setpc_b64 s[30:31] 4213entry: 4214 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(4) noundef nonnull align 8 %src, i64 32, i1 false) 4215 ret void 4216} 4217 4218define void @memcpy_p5_p4_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 4219; CHECK-LABEL: memcpy_p5_p4_sz16_align_16_16: 4220; CHECK: ; %bb.0: ; %entry 4221; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4222; CHECK-NEXT: global_load_dwordx4 v[1:4], v[1:2], off 4223; CHECK-NEXT: s_waitcnt vmcnt(0) 4224; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:12 4225; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:8 4226; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 4227; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 4228; CHECK-NEXT: s_setpc_b64 s[30:31] 4229entry: 4230 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 16, i1 false) 4231 ret void 4232} 4233 4234define void @memcpy_p5_p4_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 4235; CHECK-LABEL: memcpy_p5_p4_sz31_align_16_16: 4236; CHECK: ; %bb.0: ; %entry 4237; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4238; CHECK-NEXT: s_clause 0x1 4239; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4240; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:15 4241; CHECK-NEXT: s_waitcnt vmcnt(1) 4242; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4243; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4244; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4245; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4246; CHECK-NEXT: s_waitcnt vmcnt(0) 4247; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:27 4248; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:23 4249; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:19 4250; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:15 4251; CHECK-NEXT: s_setpc_b64 s[30:31] 4252entry: 4253 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 31, i1 false) 4254 ret void 4255} 4256 4257define void @memcpy_p5_p4_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(4) align 16 readonly %src) { 4258; CHECK-LABEL: memcpy_p5_p4_sz32_align_16_16: 4259; CHECK: ; %bb.0: ; %entry 4260; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4261; CHECK-NEXT: s_clause 0x1 4262; CHECK-NEXT: global_load_dwordx4 v[3:6], v[1:2], off 4263; CHECK-NEXT: global_load_dwordx4 v[7:10], v[1:2], off offset:16 4264; CHECK-NEXT: s_waitcnt vmcnt(1) 4265; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:12 4266; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:8 4267; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:4 4268; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen 4269; CHECK-NEXT: s_waitcnt vmcnt(0) 4270; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:28 4271; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:24 4272; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:20 4273; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:16 4274; CHECK-NEXT: s_setpc_b64 s[30:31] 4275entry: 4276 tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(4) noundef nonnull align 16 %src, i64 32, i1 false) 4277 ret void 4278} 4279 4280define void @memcpy_p5_p5_sz16_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 4281; CHECK-LABEL: memcpy_p5_p5_sz16_align_1_1: 4282; CHECK: ; %bb.0: ; %entry 4283; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4284; CHECK-NEXT: s_clause 0x3 4285; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8 4286; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12 4287; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen 4288; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 4289; CHECK-NEXT: s_waitcnt vmcnt(3) 4290; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8 4291; CHECK-NEXT: s_waitcnt vmcnt(2) 4292; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12 4293; CHECK-NEXT: s_waitcnt vmcnt(1) 4294; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen 4295; CHECK-NEXT: s_waitcnt vmcnt(0) 4296; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 4297; CHECK-NEXT: s_setpc_b64 s[30:31] 4298entry: 4299 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 16, i1 false) 4300 ret void 4301} 4302 4303define void @memcpy_p5_p5_sz31_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 4304; CHECK-LABEL: memcpy_p5_p5_sz31_align_1_1: 4305; CHECK: ; %bb.0: ; %entry 4306; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4307; CHECK-NEXT: s_clause 0x8 4308; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 4309; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 4310; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 4311; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 4312; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 4313; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 4314; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen 4315; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 4316; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 4317; CHECK-NEXT: s_waitcnt vmcnt(8) 4318; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 4319; CHECK-NEXT: s_waitcnt vmcnt(7) 4320; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 4321; CHECK-NEXT: s_waitcnt vmcnt(6) 4322; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 4323; CHECK-NEXT: s_waitcnt vmcnt(5) 4324; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 4325; CHECK-NEXT: s_waitcnt vmcnt(4) 4326; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 4327; CHECK-NEXT: s_waitcnt vmcnt(3) 4328; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 4329; CHECK-NEXT: s_waitcnt vmcnt(2) 4330; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen 4331; CHECK-NEXT: s_waitcnt vmcnt(1) 4332; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 4333; CHECK-NEXT: s_waitcnt vmcnt(0) 4334; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 4335; CHECK-NEXT: s_setpc_b64 s[30:31] 4336entry: 4337 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 31, i1 false) 4338 ret void 4339} 4340 4341define void @memcpy_p5_p5_sz32_align_1_1(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) { 4342; CHECK-LABEL: memcpy_p5_p5_sz32_align_1_1: 4343; CHECK: ; %bb.0: ; %entry 4344; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4345; CHECK-NEXT: s_clause 0x7 4346; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24 4347; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28 4348; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 4349; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 4350; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 4351; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 4352; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen 4353; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 4354; CHECK-NEXT: s_waitcnt vmcnt(7) 4355; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24 4356; CHECK-NEXT: s_waitcnt vmcnt(6) 4357; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28 4358; CHECK-NEXT: s_waitcnt vmcnt(5) 4359; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 4360; CHECK-NEXT: s_waitcnt vmcnt(4) 4361; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 4362; CHECK-NEXT: s_waitcnt vmcnt(3) 4363; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 4364; CHECK-NEXT: s_waitcnt vmcnt(2) 4365; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 4366; CHECK-NEXT: s_waitcnt vmcnt(1) 4367; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen 4368; CHECK-NEXT: s_waitcnt vmcnt(0) 4369; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 4370; CHECK-NEXT: s_setpc_b64 s[30:31] 4371entry: 4372 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 32, i1 false) 4373 ret void 4374} 4375 4376define void @memcpy_p5_p5_sz16_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 4377; CHECK-LABEL: memcpy_p5_p5_sz16_align_2_2: 4378; CHECK: ; %bb.0: ; %entry 4379; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4380; CHECK-NEXT: s_clause 0x3 4381; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:8 4382; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:12 4383; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen 4384; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 4385; CHECK-NEXT: s_waitcnt vmcnt(3) 4386; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:8 4387; CHECK-NEXT: s_waitcnt vmcnt(2) 4388; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:12 4389; CHECK-NEXT: s_waitcnt vmcnt(1) 4390; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen 4391; CHECK-NEXT: s_waitcnt vmcnt(0) 4392; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 4393; CHECK-NEXT: s_setpc_b64 s[30:31] 4394entry: 4395 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 16, i1 false) 4396 ret void 4397} 4398 4399define void @memcpy_p5_p5_sz31_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 4400; CHECK-LABEL: memcpy_p5_p5_sz31_align_2_2: 4401; CHECK: ; %bb.0: ; %entry 4402; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4403; CHECK-NEXT: s_clause 0x8 4404; CHECK-NEXT: buffer_load_ushort v2, v1, s[0:3], 0 offen offset:28 4405; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:24 4406; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 4407; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 4408; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 4409; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 4410; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen 4411; CHECK-NEXT: buffer_load_dword v9, v1, s[0:3], 0 offen offset:4 4412; CHECK-NEXT: buffer_load_ubyte v1, v1, s[0:3], 0 offen offset:30 4413; CHECK-NEXT: s_waitcnt vmcnt(8) 4414; CHECK-NEXT: buffer_store_short v2, v0, s[0:3], 0 offen offset:28 4415; CHECK-NEXT: s_waitcnt vmcnt(7) 4416; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:24 4417; CHECK-NEXT: s_waitcnt vmcnt(6) 4418; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 4419; CHECK-NEXT: s_waitcnt vmcnt(5) 4420; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 4421; CHECK-NEXT: s_waitcnt vmcnt(4) 4422; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 4423; CHECK-NEXT: s_waitcnt vmcnt(3) 4424; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 4425; CHECK-NEXT: s_waitcnt vmcnt(2) 4426; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen 4427; CHECK-NEXT: s_waitcnt vmcnt(1) 4428; CHECK-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:4 4429; CHECK-NEXT: s_waitcnt vmcnt(0) 4430; CHECK-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen offset:30 4431; CHECK-NEXT: s_setpc_b64 s[30:31] 4432entry: 4433 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 31, i1 false) 4434 ret void 4435} 4436 4437define void @memcpy_p5_p5_sz32_align_2_2(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 readonly %src) { 4438; CHECK-LABEL: memcpy_p5_p5_sz32_align_2_2: 4439; CHECK: ; %bb.0: ; %entry 4440; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4441; CHECK-NEXT: s_clause 0x7 4442; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:24 4443; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:28 4444; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:16 4445; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:20 4446; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:8 4447; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:12 4448; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen 4449; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 4450; CHECK-NEXT: s_waitcnt vmcnt(7) 4451; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:24 4452; CHECK-NEXT: s_waitcnt vmcnt(6) 4453; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:28 4454; CHECK-NEXT: s_waitcnt vmcnt(5) 4455; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:16 4456; CHECK-NEXT: s_waitcnt vmcnt(4) 4457; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:20 4458; CHECK-NEXT: s_waitcnt vmcnt(3) 4459; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:8 4460; CHECK-NEXT: s_waitcnt vmcnt(2) 4461; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:12 4462; CHECK-NEXT: s_waitcnt vmcnt(1) 4463; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen 4464; CHECK-NEXT: s_waitcnt vmcnt(0) 4465; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4 4466; CHECK-NEXT: s_setpc_b64 s[30:31] 4467entry: 4468 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 2 %dst, ptr addrspace(5) noundef nonnull align 2 %src, i64 32, i1 false) 4469 ret void 4470} 4471 4472define void @memcpy_p5_p5_sz16_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 4473; CHECK-LABEL: memcpy_p5_p5_sz16_align_8_8: 4474; CHECK: ; %bb.0: ; %entry 4475; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4476; CHECK-NEXT: s_clause 0x3 4477; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 4478; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 4479; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 4480; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4481; CHECK-NEXT: s_waitcnt vmcnt(3) 4482; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 4483; CHECK-NEXT: s_waitcnt vmcnt(2) 4484; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 4485; CHECK-NEXT: s_waitcnt vmcnt(1) 4486; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 4487; CHECK-NEXT: s_waitcnt vmcnt(0) 4488; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4489; CHECK-NEXT: s_setpc_b64 s[30:31] 4490entry: 4491 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 16, i1 false) 4492 ret void 4493} 4494 4495define void @memcpy_p5_p5_sz31_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 4496; CHECK-LABEL: memcpy_p5_p5_sz31_align_8_8: 4497; CHECK: ; %bb.0: ; %entry 4498; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4499; CHECK-NEXT: s_clause 0x7 4500; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 4501; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 4502; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 4503; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 4504; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen 4505; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 4506; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 4507; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4508; CHECK-NEXT: s_waitcnt vmcnt(7) 4509; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15 4510; CHECK-NEXT: s_waitcnt vmcnt(6) 4511; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19 4512; CHECK-NEXT: s_waitcnt vmcnt(5) 4513; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23 4514; CHECK-NEXT: s_waitcnt vmcnt(4) 4515; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27 4516; CHECK-NEXT: s_waitcnt vmcnt(3) 4517; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen 4518; CHECK-NEXT: s_waitcnt vmcnt(2) 4519; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 4520; CHECK-NEXT: s_waitcnt vmcnt(1) 4521; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 4522; CHECK-NEXT: s_waitcnt vmcnt(0) 4523; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4524; CHECK-NEXT: s_setpc_b64 s[30:31] 4525entry: 4526 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 31, i1 false) 4527 ret void 4528} 4529 4530define void @memcpy_p5_p5_sz32_align_8_8(ptr addrspace(5) align 8 %dst, ptr addrspace(5) align 8 readonly %src) { 4531; CHECK-LABEL: memcpy_p5_p5_sz32_align_8_8: 4532; CHECK: ; %bb.0: ; %entry 4533; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4534; CHECK-NEXT: s_clause 0x7 4535; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16 4536; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20 4537; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24 4538; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28 4539; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen 4540; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 4541; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 4542; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4543; CHECK-NEXT: s_waitcnt vmcnt(7) 4544; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16 4545; CHECK-NEXT: s_waitcnt vmcnt(6) 4546; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20 4547; CHECK-NEXT: s_waitcnt vmcnt(5) 4548; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24 4549; CHECK-NEXT: s_waitcnt vmcnt(4) 4550; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28 4551; CHECK-NEXT: s_waitcnt vmcnt(3) 4552; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen 4553; CHECK-NEXT: s_waitcnt vmcnt(2) 4554; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 4555; CHECK-NEXT: s_waitcnt vmcnt(1) 4556; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 4557; CHECK-NEXT: s_waitcnt vmcnt(0) 4558; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4559; CHECK-NEXT: s_setpc_b64 s[30:31] 4560entry: 4561 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 8 %dst, ptr addrspace(5) noundef nonnull align 8 %src, i64 32, i1 false) 4562 ret void 4563} 4564 4565define void @memcpy_p5_p5_sz16_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 4566; CHECK-LABEL: memcpy_p5_p5_sz16_align_16_16: 4567; CHECK: ; %bb.0: ; %entry 4568; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4569; CHECK-NEXT: s_clause 0x3 4570; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen 4571; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:4 4572; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:8 4573; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4574; CHECK-NEXT: s_waitcnt vmcnt(3) 4575; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen 4576; CHECK-NEXT: s_waitcnt vmcnt(2) 4577; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:4 4578; CHECK-NEXT: s_waitcnt vmcnt(1) 4579; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:8 4580; CHECK-NEXT: s_waitcnt vmcnt(0) 4581; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4582; CHECK-NEXT: s_setpc_b64 s[30:31] 4583entry: 4584 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 16, i1 false) 4585 ret void 4586} 4587 4588define void @memcpy_p5_p5_sz31_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 4589; CHECK-LABEL: memcpy_p5_p5_sz31_align_16_16: 4590; CHECK: ; %bb.0: ; %entry 4591; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4592; CHECK-NEXT: s_clause 0x7 4593; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:15 4594; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:19 4595; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:23 4596; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:27 4597; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen 4598; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 4599; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 4600; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4601; CHECK-NEXT: s_waitcnt vmcnt(7) 4602; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:15 4603; CHECK-NEXT: s_waitcnt vmcnt(6) 4604; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:19 4605; CHECK-NEXT: s_waitcnt vmcnt(5) 4606; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:23 4607; CHECK-NEXT: s_waitcnt vmcnt(4) 4608; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:27 4609; CHECK-NEXT: s_waitcnt vmcnt(3) 4610; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen 4611; CHECK-NEXT: s_waitcnt vmcnt(2) 4612; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 4613; CHECK-NEXT: s_waitcnt vmcnt(1) 4614; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 4615; CHECK-NEXT: s_waitcnt vmcnt(0) 4616; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4617; CHECK-NEXT: s_setpc_b64 s[30:31] 4618entry: 4619 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 31, i1 false) 4620 ret void 4621} 4622 4623define void @memcpy_p5_p5_sz32_align_16_16(ptr addrspace(5) align 16 %dst, ptr addrspace(5) align 16 readonly %src) { 4624; CHECK-LABEL: memcpy_p5_p5_sz32_align_16_16: 4625; CHECK: ; %bb.0: ; %entry 4626; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4627; CHECK-NEXT: s_clause 0x7 4628; CHECK-NEXT: buffer_load_dword v2, v1, s[0:3], 0 offen offset:16 4629; CHECK-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen offset:20 4630; CHECK-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:24 4631; CHECK-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:28 4632; CHECK-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen 4633; CHECK-NEXT: buffer_load_dword v7, v1, s[0:3], 0 offen offset:4 4634; CHECK-NEXT: buffer_load_dword v8, v1, s[0:3], 0 offen offset:8 4635; CHECK-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:12 4636; CHECK-NEXT: s_waitcnt vmcnt(7) 4637; CHECK-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:16 4638; CHECK-NEXT: s_waitcnt vmcnt(6) 4639; CHECK-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:20 4640; CHECK-NEXT: s_waitcnt vmcnt(5) 4641; CHECK-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:24 4642; CHECK-NEXT: s_waitcnt vmcnt(4) 4643; CHECK-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:28 4644; CHECK-NEXT: s_waitcnt vmcnt(3) 4645; CHECK-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen 4646; CHECK-NEXT: s_waitcnt vmcnt(2) 4647; CHECK-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:4 4648; CHECK-NEXT: s_waitcnt vmcnt(1) 4649; CHECK-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:8 4650; CHECK-NEXT: s_waitcnt vmcnt(0) 4651; CHECK-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:12 4652; CHECK-NEXT: s_setpc_b64 s[30:31] 4653entry: 4654 tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 16 %dst, ptr addrspace(5) noundef nonnull align 16 %src, i64 32, i1 false) 4655 ret void 4656} 4657 4658declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 4659declare void @llvm.memcpy.p0.p1.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 4660declare void @llvm.memcpy.p0.p3.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 4661declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 4662declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 4663declare void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 4664declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 4665declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 4666declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 4667declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 4668declare void @llvm.memcpy.p3.p0.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 4669declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 4670declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 4671declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 4672declare void @llvm.memcpy.p3.p5.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 4673declare void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2 4674declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 4675declare void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 4676declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 4677declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 4678 4679attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } 4680 4681