1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GCN %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5 6define ptr addrspace(1) @v_ptrmask_global_variable_i64(ptr addrspace(1) %ptr, i64 %mask) { 7; GCN-LABEL: v_ptrmask_global_variable_i64: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_and_b32_e32 v1, v1, v3 11; GCN-NEXT: v_and_b32_e32 v0, v0, v2 12; GCN-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX10PLUS-LABEL: v_ptrmask_global_variable_i64: 15; GFX10PLUS: ; %bb.0: 16; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v2 18; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v3 19; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 20 %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask) 21 ret ptr addrspace(1) %masked 22} 23 24define ptr addrspace(3) @v_ptrmask_local_variable_i32(ptr addrspace(3) %ptr, i32 %mask) { 25; GCN-LABEL: v_ptrmask_local_variable_i32: 26; GCN: ; %bb.0: 27; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GCN-NEXT: v_and_b32_e32 v0, v0, v1 29; GCN-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10PLUS-LABEL: v_ptrmask_local_variable_i32: 32; GFX10PLUS: ; %bb.0: 33; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1 35; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 36 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) 37 ret ptr addrspace(3) %masked 38} 39 40define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i64(ptr addrspace(1) inreg %ptr, i64 inreg %mask) { 41; GCN-LABEL: s_ptrmask_global_variable_i64: 42; GCN: ; %bb.0: 43; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 44; GCN-NEXT: ; return to shader part epilog 45; 46; GFX10PLUS-LABEL: s_ptrmask_global_variable_i64: 47; GFX10PLUS: ; %bb.0: 48; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5] 49; GFX10PLUS-NEXT: ; return to shader part epilog 50 %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) %ptr, i64 %mask) 51 ret ptr addrspace(1) %masked 52} 53 54define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3) inreg %ptr, i32 inreg %mask) { 55; GCN-LABEL: s_ptrmask_local_variable_i32: 56; GCN: ; %bb.0: 57; GCN-NEXT: s_and_b32 s0, s2, s3 58; GCN-NEXT: ; return to shader part epilog 59; 60; GFX10PLUS-LABEL: s_ptrmask_local_variable_i32: 61; GFX10PLUS: ; %bb.0: 62; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3 63; GFX10PLUS-NEXT: ; return to shader part epilog 64 %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask) 65 ret ptr addrspace(3) %masked 66} 67 68define ptr addrspace(7) @v_ptrmask_buffer_fat_ptr_variable_i32(ptr addrspace(7) %ptr, i32 %mask) { 69; GCN-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32: 70; GCN: ; %bb.0: 71; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GCN-NEXT: v_and_b32_e32 v4, v4, v5 73; GCN-NEXT: s_setpc_b64 s[30:31] 74; 75; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_variable_i32: 76; GFX10PLUS: ; %bb.0: 77; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v5 79; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 80 %masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 %mask) 81 ret ptr addrspace(7) %masked 82} 83 84define ptr addrspace(7) @v_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspace(7) %ptr) { 85; GCN-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8: 86; GCN: ; %bb.0: 87; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GCN-NEXT: v_and_b32_e32 v4, -8, v4 89; GCN-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX10PLUS-LABEL: v_ptrmask_buffer_fat_ptr_i32_neg8: 92; GFX10PLUS: ; %bb.0: 93; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX10PLUS-NEXT: v_and_b32_e32 v4, -8, v4 95; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 96 %masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 -8) 97 ret ptr addrspace(7) %masked 98} 99 100define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_variable_i32(ptr addrspace(7) inreg %ptr, i32 inreg %mask) { 101; GCN-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32: 102; GCN: ; %bb.0: 103; GCN-NEXT: s_mov_b32 s8, s4 104; GCN-NEXT: s_mov_b32 s1, s3 105; GCN-NEXT: s_mov_b32 s0, s2 106; GCN-NEXT: s_and_b32 s4, s6, s7 107; GCN-NEXT: s_mov_b32 s2, s8 108; GCN-NEXT: s_mov_b32 s3, s5 109; GCN-NEXT: ; return to shader part epilog 110; 111; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_variable_i32: 112; GFX10PLUS: ; %bb.0: 113; GFX10PLUS-NEXT: s_mov_b32 s8, s4 114; GFX10PLUS-NEXT: s_mov_b32 s1, s3 115; GFX10PLUS-NEXT: s_mov_b32 s0, s2 116; GFX10PLUS-NEXT: s_and_b32 s4, s6, s7 117; GFX10PLUS-NEXT: s_mov_b32 s2, s8 118; GFX10PLUS-NEXT: s_mov_b32 s3, s5 119; GFX10PLUS-NEXT: ; return to shader part epilog 120 %masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 %mask) 121 ret ptr addrspace(7) %masked 122} 123 124define amdgpu_ps ptr addrspace(7) @s_ptrmask_buffer_fat_ptr_i32_neg8(ptr addrspace(7) inreg %ptr) { 125; GCN-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8: 126; GCN: ; %bb.0: 127; GCN-NEXT: s_mov_b32 s7, s4 128; GCN-NEXT: s_mov_b32 s1, s3 129; GCN-NEXT: s_mov_b32 s0, s2 130; GCN-NEXT: s_and_b32 s4, s6, -8 131; GCN-NEXT: s_mov_b32 s2, s7 132; GCN-NEXT: s_mov_b32 s3, s5 133; GCN-NEXT: ; return to shader part epilog 134; 135; GFX10PLUS-LABEL: s_ptrmask_buffer_fat_ptr_i32_neg8: 136; GFX10PLUS: ; %bb.0: 137; GFX10PLUS-NEXT: s_mov_b32 s7, s4 138; GFX10PLUS-NEXT: s_mov_b32 s1, s3 139; GFX10PLUS-NEXT: s_mov_b32 s0, s2 140; GFX10PLUS-NEXT: s_and_b32 s4, s6, -8 141; GFX10PLUS-NEXT: s_mov_b32 s2, s7 142; GFX10PLUS-NEXT: s_mov_b32 s3, s5 143; GFX10PLUS-NEXT: ; return to shader part epilog 144 %masked = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %ptr, i32 -8) 145 ret ptr addrspace(7) %masked 146} 147 148define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) %ptr, i128 %mask) { 149; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128: 150; GCN: ; %bb.0: 151; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GCN-NEXT: v_and_b32_e32 v1, v1, v5 153; GCN-NEXT: v_and_b32_e32 v0, v0, v4 154; GCN-NEXT: v_and_b32_e32 v3, v3, v7 155; GCN-NEXT: v_and_b32_e32 v2, v2, v6 156; GCN-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128: 159; GFX10PLUS: ; %bb.0: 160; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4 162; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5 163; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6 164; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7 165; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 166 %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) 167 ret ptr addrspace(8) %masked 168} 169 170define ptr addrspace(8) @v_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) %ptr) { 171; GCN-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: 172; GCN: ; %bb.0: 173; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GCN-NEXT: v_and_b32_e32 v0, -8, v0 175; GCN-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX10PLUS-LABEL: v_ptrmask_buffer_resource_variable_i128_neg8: 178; GFX10PLUS: ; %bb.0: 179; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX10PLUS-NEXT: v_and_b32_e32 v0, -8, v0 181; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 182 %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) 183 ret ptr addrspace(8) %masked 184} 185 186define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128(ptr addrspace(8) inreg %ptr, i128 inreg %mask) { 187; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128: 188; GCN: ; %bb.0: 189; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] 190; GCN-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9] 191; GCN-NEXT: ; return to shader part epilog 192; 193; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128: 194; GFX10PLUS: ; %bb.0: 195; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[6:7] 196; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[4:5], s[8:9] 197; GFX10PLUS-NEXT: ; return to shader part epilog 198 %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 %mask) 199 ret ptr addrspace(8) %masked 200} 201 202define amdgpu_ps ptr addrspace(8) @s_ptrmask_buffer_resource_variable_i128_neg8(ptr addrspace(8) inreg %ptr) { 203; GCN-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: 204; GCN: ; %bb.0: 205; GCN-NEXT: s_mov_b32 s1, s3 206; GCN-NEXT: s_and_b32 s0, s2, -8 207; GCN-NEXT: s_mov_b32 s2, s4 208; GCN-NEXT: s_mov_b32 s3, s5 209; GCN-NEXT: ; return to shader part epilog 210; 211; GFX10PLUS-LABEL: s_ptrmask_buffer_resource_variable_i128_neg8: 212; GFX10PLUS: ; %bb.0: 213; GFX10PLUS-NEXT: s_mov_b32 s1, s3 214; GFX10PLUS-NEXT: s_and_b32 s0, s2, -8 215; GFX10PLUS-NEXT: s_mov_b32 s2, s4 216; GFX10PLUS-NEXT: s_mov_b32 s3, s5 217; GFX10PLUS-NEXT: ; return to shader part epilog 218 %masked = call ptr addrspace(8) @llvm.ptrmask.p8.i128(ptr addrspace(8) %ptr, i128 -8) 219 ret ptr addrspace(8) %masked 220} 221 222declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0 223declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0 224 225attributes #0 = { nounwind readnone speculatable willreturn } 226;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 227; GFX10: {{.*}} 228; GFX11: {{.*}} 229