1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}atomic_store_monotonic_i8: 5; GCN: s_waitcnt 6; GFX9-NOT: s_mov_b32 m0 7; CI-NEXT: s_mov_b32 m0 8; GCN-NEXT: ds_write_b8 v0, v1{{$}} 9; GCN-NEXT: s_waitcnt lgkmcnt(0) 10; GCN-NEXT: s_setpc_b64 11define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) { 12 store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1 13 ret void 14} 15 16; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i8: 17; GCN: s_waitcnt 18; GFX9-NOT: s_mov_b32 m0 19; CI-NEXT: s_mov_b32 m0 20; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}} 21; GCN-NEXT: s_waitcnt lgkmcnt(0) 22; GCN-NEXT: s_setpc_b64 23define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) { 24 %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16 25 store atomic i8 %val, ptr addrspace(3) %gep monotonic, align 1 26 ret void 27} 28 29; GCN-LABEL: {{^}}atomic_store_monotonic_i16: 30; GCN: s_waitcnt 31; GFX9-NOT: s_mov_b32 m0 32; CI-NEXT: s_mov_b32 m0 33; GCN-NEXT: ds_write_b16 v0, v1{{$}} 34; GCN-NEXT: s_waitcnt lgkmcnt(0) 35; GCN-NEXT: s_setpc_b64 36define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) { 37 store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2 38 ret void 39} 40 41; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i16: 42; GCN: s_waitcnt 43; GFX9-NOT: s_mov_b32 m0 44; CI-NEXT: s_mov_b32 m0 45; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} 46; GCN-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NEXT: s_setpc_b64 48define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val) { 49 %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16 50 store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2 51 ret void 52} 53 54; GCN-LABEL: {{^}}atomic_store_monotonic_i32: 55; GCN: s_waitcnt 56; GFX9-NOT: s_mov_b32 m0 57; CI-NEXT: s_mov_b32 m0 58; GCN-NEXT: ds_write_b32 v0, v1{{$}} 59; GCN-NEXT: s_waitcnt lgkmcnt(0) 60; GCN-NEXT: s_setpc_b64 61define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) { 62 store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4 63 ret void 64} 65 66; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i32: 67; GCN: s_waitcnt 68; GFX9-NOT: s_mov_b32 m0 69; CI-NEXT: s_mov_b32 m0 70; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}} 71; GCN-NEXT: s_waitcnt lgkmcnt(0) 72; GCN-NEXT: s_setpc_b64 73define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val) { 74 %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16 75 store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4 76 ret void 77} 78 79; GCN-LABEL: {{^}}atomic_store_monotonic_i64: 80; GCN: s_waitcnt 81; GFX9-NOT: s_mov_b32 m0 82; CI-NEXT: s_mov_b32 m0 83; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}} 84; GCN-NEXT: s_waitcnt lgkmcnt(0) 85; GCN-NEXT: s_setpc_b64 86define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) { 87 store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8 88 ret void 89} 90 91; GCN-LABEL: {{^}}atomic_store_monotonic_offset_i64: 92; GCN: s_waitcnt 93; GFX9-NOT: s_mov_b32 m0 94; CI-NEXT: s_mov_b32 m0 95; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}} 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: s_setpc_b64 98define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val) { 99 %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16 100 store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8 101 ret void 102} 103 104; GCN-LABEL: {{^}}atomic_store_monotonic_f16: 105; GCN: s_waitcnt 106; GFX9-NOT: s_mov_b32 m0 107; CI-NEXT: s_mov_b32 m0 108; GCN-NEXT: ds_write_b16 v0, v1{{$}} 109; GCN-NEXT: s_waitcnt lgkmcnt(0) 110; GCN-NEXT: s_setpc_b64 111define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) { 112 %val = bitcast i16 %arg.val to half 113 store atomic half %val, ptr addrspace(3) %ptr monotonic, align 2 114 ret void 115} 116 117; GCN-LABEL: {{^}}atomic_store_monotonic_offset_f16: 118; GCN: s_waitcnt 119; GFX9-NOT: s_mov_b32 m0 120; CI-NEXT: s_mov_b32 m0 121; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} 122; GCN-NEXT: s_waitcnt lgkmcnt(0) 123; GCN-NEXT: s_setpc_b64 124define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.val) { 125 %val = bitcast i16 %arg.val to half 126 %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16 127 store atomic half %val, ptr addrspace(3) %gep monotonic, align 2 128 ret void 129} 130 131; GCN-LABEL: {{^}}atomic_store_monotonic_bf16: 132; GCN: s_waitcnt 133; GFX9-NOT: s_mov_b32 m0 134; CI-NEXT: s_mov_b32 m0 135; GCN-NEXT: ds_write_b16 v0, v1{{$}} 136; GCN-NEXT: s_waitcnt lgkmcnt(0) 137; GCN-NEXT: s_setpc_b64 138define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) { 139 %val = bitcast i16 %arg.val to bfloat 140 store atomic bfloat %val, ptr addrspace(3) %ptr monotonic, align 2 141 ret void 142} 143 144; GCN-LABEL: {{^}}atomic_store_monotonic_offset_bf16: 145; GCN: s_waitcnt 146; GFX9-NOT: s_mov_b32 m0 147; CI-NEXT: s_mov_b32 m0 148; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}} 149; GCN-NEXT: s_waitcnt lgkmcnt(0) 150; GCN-NEXT: s_setpc_b64 151define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.val) { 152 %val = bitcast i16 %arg.val to bfloat 153 %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16 154 store atomic bfloat %val, ptr addrspace(3) %gep monotonic, align 2 155 ret void 156} 157