1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3 4; GCN-LABEL: {{^}}atomic_load_monotonic_i8: 5; GCN: s_waitcnt 6; GFX9-NOT: s_mov_b32 m0 7; CI-NEXT: s_mov_b32 m0 8; GCN-NEXT: ds_read_u8 v0, v0{{$}} 9; GCN-NEXT: s_waitcnt lgkmcnt(0) 10; GCN-NEXT: s_setpc_b64 11define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) { 12 %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1 13 ret i8 %load 14} 15 16; GCN-LABEL: {{^}}atomic_load_monotonic_i8_offset: 17; GCN: s_waitcnt 18; GFX9-NOT: s_mov_b32 m0 19; CI-NEXT: s_mov_b32 m0 20; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}} 21; GCN-NEXT: s_waitcnt lgkmcnt(0) 22; GCN-NEXT: s_setpc_b64 23define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) { 24 %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16 25 %load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1 26 ret i8 %load 27} 28 29; GCN-LABEL: {{^}}atomic_load_monotonic_i16: 30; GCN: s_waitcnt 31; GFX9-NOT: s_mov_b32 m0 32; CI-NEXT: s_mov_b32 m0 33; GCN-NEXT: ds_read_u16 v0, v0{{$}} 34; GCN-NEXT: s_waitcnt lgkmcnt(0) 35; GCN-NEXT: s_setpc_b64 36define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) { 37 %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2 38 ret i16 %load 39} 40 41; GCN-LABEL: {{^}}atomic_load_monotonic_i16_offset: 42; GCN: s_waitcnt 43; GFX9-NOT: s_mov_b32 m0 44; CI-NEXT: s_mov_b32 m0 45; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} 46; GCN-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NEXT: s_setpc_b64 48define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) { 49 %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16 50 %load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2 51 ret i16 %load 52} 53 54; GCN-LABEL: {{^}}atomic_load_monotonic_i32: 55; GCN: s_waitcnt 56; GFX9-NOT: s_mov_b32 m0 57; CI-NEXT: s_mov_b32 m0 58; GCN-NEXT: ds_read_b32 v0, v0{{$}} 59; GCN-NEXT: s_waitcnt lgkmcnt(0) 60; GCN-NEXT: s_setpc_b64 61define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) { 62 %load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4 63 ret i32 %load 64} 65 66; GCN-LABEL: {{^}}atomic_load_monotonic_i32_offset: 67; GCN: s_waitcnt 68; GFX9-NOT: s_mov_b32 m0 69; CI-NEXT: s_mov_b32 m0 70; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 71; GCN-NEXT: s_waitcnt lgkmcnt(0) 72; GCN-NEXT: s_setpc_b64 73define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) { 74 %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16 75 %load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4 76 ret i32 %load 77} 78 79; GCN-LABEL: {{^}}atomic_load_monotonic_i64: 80; GCN: s_waitcnt 81; GFX9-NOT: s_mov_b32 m0 82; CI-NEXT: s_mov_b32 m0 83; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}} 84; GCN-NEXT: s_waitcnt lgkmcnt(0) 85; GCN-NEXT: s_setpc_b64 86define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) { 87 %load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8 88 ret i64 %load 89} 90 91; GCN-LABEL: {{^}}atomic_load_monotonic_i64_offset: 92; GCN: s_waitcnt 93; GFX9-NOT: s_mov_b32 m0 94; CI-NEXT: s_mov_b32 m0 95; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: s_setpc_b64 98define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) { 99 %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16 100 %load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8 101 ret i64 %load 102} 103 104; GCN-LABEL: {{^}}atomic_load_monotonic_f32_offset: 105; GCN: s_waitcnt 106; GFX9-NOT: s_mov_b32 m0 107; CI-NEXT: s_mov_b32 m0 108; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 109; GCN-NEXT: s_waitcnt lgkmcnt(0) 110; GCN-NEXT: s_setpc_b64 111define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) { 112 %gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16 113 %load = load atomic float, ptr addrspace(3) %gep monotonic, align 4 114 ret float %load 115} 116 117; GCN-LABEL: {{^}}atomic_load_monotonic_f64_offset: 118; GCN: s_waitcnt 119; GFX9-NOT: s_mov_b32 m0 120; CI-NEXT: s_mov_b32 m0 121; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 122; GCN-NEXT: s_waitcnt lgkmcnt(0) 123; GCN-NEXT: s_setpc_b64 124define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) { 125 %gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16 126 %load = load atomic double, ptr addrspace(3) %gep monotonic, align 8 127 ret double %load 128} 129 130; GCN-LABEL: {{^}}atomic_load_monotonic_p0i8_offset: 131; GCN: s_waitcnt 132; GFX9-NOT: s_mov_b32 m0 133; CI-NEXT: s_mov_b32 m0 134; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}} 135; GCN-NEXT: s_waitcnt lgkmcnt(0) 136; GCN-NEXT: s_setpc_b64 137define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) { 138 %gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16 139 %load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8 140 ret ptr %load 141} 142 143; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset: 144; GCN: s_waitcnt 145; GFX9-NOT: s_mov_b32 m0 146; CI-NEXT: s_mov_b32 m0 147; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}} 148; GCN-NEXT: s_waitcnt lgkmcnt(0) 149; GCN-NEXT: s_setpc_b64 150define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr) { 151 %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16 152 %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4 153 ret ptr addrspace(3) %load 154} 155 156; GCN-LABEL: {{^}}atomic_load_monotonic_f16: 157; GCN: s_waitcnt 158; GFX9-NOT: s_mov_b32 m0 159; CI-NEXT: s_mov_b32 m0 160; GCN-NEXT: ds_read_u16 v0, v0{{$}} 161; GCN-NEXT: s_waitcnt lgkmcnt(0) 162; GCN-NEXT: s_setpc_b64 163define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) { 164 %load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2 165 %ret = bitcast half %load to i16 166 ret i16 %ret 167} 168 169; GCN-LABEL: {{^}}atomic_load_monotonic_f16_offset: 170; GCN: s_waitcnt 171; GFX9-NOT: s_mov_b32 m0 172; CI-NEXT: s_mov_b32 m0 173; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} 174; GCN-NEXT: s_waitcnt lgkmcnt(0) 175; GCN-NEXT: s_setpc_b64 176define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) { 177 %gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16 178 %load = load atomic half, ptr addrspace(3) %gep monotonic, align 2 179 %ret = bitcast half %load to i16 180 ret i16 %ret 181} 182 183; GCN-LABEL: {{^}}atomic_load_monotonic_bf16: 184; GCN: s_waitcnt 185; GFX9-NOT: s_mov_b32 m0 186; CI-NEXT: s_mov_b32 m0 187; GCN-NEXT: ds_read_u16 v0, v0{{$}} 188; GCN-NEXT: s_waitcnt lgkmcnt(0) 189; GCN-NEXT: s_setpc_b64 190define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) { 191 %load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2 192 %ret = bitcast bfloat %load to i16 193 ret i16 %ret 194} 195 196; GCN-LABEL: {{^}}atomic_load_monotonic_bf16_offset: 197; GCN: s_waitcnt 198; GFX9-NOT: s_mov_b32 m0 199; CI-NEXT: s_mov_b32 m0 200; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}} 201; GCN-NEXT: s_waitcnt lgkmcnt(0) 202; GCN-NEXT: s_setpc_b64 203define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) { 204 %gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16 205 %load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2 206 %ret = bitcast bfloat %load to i16 207 ret i16 %ret 208} 209