1; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SICIVI,GFX89 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s 4 5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64: 6; SICIVI: s_mov_b32 m0 7; GFX9-NOT: m0 8 9; GCN: ds_wrxchg_rtn_b64 10; GCN: s_endpgm 11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 12 %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst 13 store i64 %result, ptr addrspace(1) %out, align 8 14 ret void 15} 16 17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset: 18; SICIVI: s_mov_b32 m0 19; GFX9-NOT: m0 20 21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 22; GCN: s_endpgm 23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 24 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 25 %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst 26 store i64 %result, ptr addrspace(1) %out, align 8 27 ret void 28} 29 30; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset: 31; SICIVI: s_mov_b32 m0 32; GFX9-NOT: m0 33 34; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 35; GCN: s_endpgm 36define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 37 %gep = getelementptr double, ptr addrspace(3) %ptr, i32 4 38 %result = atomicrmw xchg ptr addrspace(3) %gep, double 4.0 seq_cst 39 store double %result, ptr addrspace(1) %out, align 8 40 ret void 41} 42 43; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset: 44; SICIVI: s_mov_b32 m0 45; GFX9-NOT: m0 46 47; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 48; GCN: s_endpgm 49define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 50 %gep = getelementptr ptr, ptr addrspace(3) %ptr, i32 4 51 %result = atomicrmw xchg ptr addrspace(3) %gep, ptr null seq_cst 52 store ptr %result, ptr addrspace(1) %out, align 8 53 ret void 54} 55 56; GCN-LABEL: {{^}}lds_atomic_add_ret_i64: 57; SICIVI: s_mov_b32 m0 58; GFX9-NOT: m0 59 60; GCN: ds_add_rtn_u64 61; GCN: s_endpgm 62define amdgpu_kernel void @lds_atomic_add_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 63 %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst 64 store i64 %result, ptr addrspace(1) %out, align 8 65 ret void 66} 67 68; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset: 69; SICIVI-DAG: s_mov_b32 m0 70; GFX9-NOT: m0 71 72; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 73; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 74; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 75; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 76; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 77; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32 78; GCN: buffer_store_dwordx2 [[RESULT]], 79; GCN: s_endpgm 80define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 81 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 82 %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst 83 store i64 %result, ptr addrspace(1) %out, align 8 84 ret void 85} 86 87; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64: 88; SICIVI-DAG: s_mov_b32 m0 89; GFX9-NOT: m0 90 91; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 92; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 93; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 94; GCN: buffer_store_dwordx2 [[RESULT]], 95; GCN: s_endpgm 96define amdgpu_kernel void @lds_atomic_add1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 97 %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst 98 store i64 %result, ptr addrspace(1) %out, align 8 99 ret void 100} 101 102; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset: 103; SICIVI: s_mov_b32 m0 104; GFX9-NOT: m0 105 106; GCN: ds_add_rtn_u64 {{.*}} offset:32 107; GCN: s_endpgm 108define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 109 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 110 %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst 111 store i64 %result, ptr addrspace(1) %out, align 8 112 ret void 113} 114 115; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64: 116; SICIVI: s_mov_b32 m0 117; GFX9-NOT: m0 118 119; GCN: ds_sub_rtn_u64 120; GCN: s_endpgm 121define amdgpu_kernel void @lds_atomic_sub_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 122 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst 123 store i64 %result, ptr addrspace(1) %out, align 8 124 ret void 125} 126 127; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset: 128; SICIVI: s_mov_b32 m0 129; GFX9-NOT: m0 130 131; GCN: ds_sub_rtn_u64 {{.*}} offset:32 132; GCN: s_endpgm 133define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 134 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 135 %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst 136 store i64 %result, ptr addrspace(1) %out, align 8 137 ret void 138} 139 140; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64: 141; SICIVI-DAG: s_mov_b32 m0 142; GFX9-NOT: m0 143 144; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 145; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 146; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 147; GCN: buffer_store_dwordx2 [[RESULT]], 148; GCN: s_endpgm 149define amdgpu_kernel void @lds_atomic_sub1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 150 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst 151 store i64 %result, ptr addrspace(1) %out, align 8 152 ret void 153} 154 155; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset: 156; SICIVI: s_mov_b32 m0 157; GFX9-NOT: m0 158 159; GCN: ds_sub_rtn_u64 {{.*}} offset:32 160; GCN: s_endpgm 161define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 162 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 163 %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst 164 store i64 %result, ptr addrspace(1) %out, align 8 165 ret void 166} 167 168; GCN-LABEL: {{^}}lds_atomic_and_ret_i64: 169; SICIVI: s_mov_b32 m0 170; GFX9-NOT: m0 171 172; GCN: ds_and_rtn_b64 173; GCN: s_endpgm 174define amdgpu_kernel void @lds_atomic_and_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 175 %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst 176 store i64 %result, ptr addrspace(1) %out, align 8 177 ret void 178} 179 180; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset: 181; SICIVI: s_mov_b32 m0 182; GFX9-NOT: m0 183 184; GCN: ds_and_rtn_b64 {{.*}} offset:32 185; GCN: s_endpgm 186define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 187 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 188 %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst 189 store i64 %result, ptr addrspace(1) %out, align 8 190 ret void 191} 192 193; GCN-LABEL: {{^}}lds_atomic_or_ret_i64: 194; SICIVI: s_mov_b32 m0 195; GFX9-NOT: m0 196 197; GCN: ds_or_rtn_b64 198; GCN: s_endpgm 199define amdgpu_kernel void @lds_atomic_or_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 200 %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst 201 store i64 %result, ptr addrspace(1) %out, align 8 202 ret void 203} 204 205; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset: 206; SICIVI: s_mov_b32 m0 207; GFX9-NOT: m0 208 209; GCN: ds_or_rtn_b64 {{.*}} offset:32 210; GCN: s_endpgm 211define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 212 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 213 %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst 214 store i64 %result, ptr addrspace(1) %out, align 8 215 ret void 216} 217 218; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64: 219; SICIVI: s_mov_b32 m0 220; GFX9-NOT: m0 221 222; GCN: ds_xor_rtn_b64 223; GCN: s_endpgm 224define amdgpu_kernel void @lds_atomic_xor_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 225 %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst 226 store i64 %result, ptr addrspace(1) %out, align 8 227 ret void 228} 229 230; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset: 231; SICIVI: s_mov_b32 m0 232; GFX9-NOT: m0 233 234; GCN: ds_xor_rtn_b64 {{.*}} offset:32 235; GCN: s_endpgm 236define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 237 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 238 %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst 239 store i64 %result, ptr addrspace(1) %out, align 8 240 ret void 241} 242 243; FIXME: There is no atomic nand instr 244; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this. 245; define amdgpu_kernel void @lds_atomic_nand_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 246; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst 247; store i64 %result, ptr addrspace(1) %out, align 8 248; ret void 249; } 250 251; GCN-LABEL: {{^}}lds_atomic_min_ret_i64: 252; SICIVI: s_mov_b32 m0 253; GFX9-NOT: m0 254 255; GCN: ds_min_rtn_i64 256; GCN: s_endpgm 257define amdgpu_kernel void @lds_atomic_min_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 258 %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst 259 store i64 %result, ptr addrspace(1) %out, align 8 260 ret void 261} 262 263; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset: 264; SICIVI: s_mov_b32 m0 265; GFX9-NOT: m0 266 267; GCN: ds_min_rtn_i64 {{.*}} offset:32 268; GCN: s_endpgm 269define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 270 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 271 %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst 272 store i64 %result, ptr addrspace(1) %out, align 8 273 ret void 274} 275 276; GCN-LABEL: {{^}}lds_atomic_max_ret_i64: 277; SICIVI: s_mov_b32 m0 278; GFX9-NOT: m0 279 280; GCN: ds_max_rtn_i64 281; GCN: s_endpgm 282define amdgpu_kernel void @lds_atomic_max_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 283 %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst 284 store i64 %result, ptr addrspace(1) %out, align 8 285 ret void 286} 287 288; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset: 289; SICIVI: s_mov_b32 m0 290; GFX9-NOT: m0 291 292; GCN: ds_max_rtn_i64 {{.*}} offset:32 293; GCN: s_endpgm 294define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 295 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 296 %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst 297 store i64 %result, ptr addrspace(1) %out, align 8 298 ret void 299} 300 301; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64: 302; SICIVI: s_mov_b32 m0 303; GFX9-NOT: m0 304 305; GCN: ds_min_rtn_u64 306; GCN: s_endpgm 307define amdgpu_kernel void @lds_atomic_umin_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 308 %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst 309 store i64 %result, ptr addrspace(1) %out, align 8 310 ret void 311} 312 313; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset: 314; SICIVI: s_mov_b32 m0 315; GFX9-NOT: m0 316 317; GCN: ds_min_rtn_u64 {{.*}} offset:32 318; GCN: s_endpgm 319define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 320 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 321 %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst 322 store i64 %result, ptr addrspace(1) %out, align 8 323 ret void 324} 325 326; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64: 327; SICIVI: s_mov_b32 m0 328; GFX9-NOT: m0 329 330; GCN: ds_max_rtn_u64 331; GCN: s_endpgm 332define amdgpu_kernel void @lds_atomic_umax_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 333 %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst 334 store i64 %result, ptr addrspace(1) %out, align 8 335 ret void 336} 337 338; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset: 339; SICIVI: s_mov_b32 m0 340; GFX9-NOT: m0 341 342; GCN: ds_max_rtn_u64 {{.*}} offset:32 343; GCN: s_endpgm 344define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 345 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 346 %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst 347 store i64 %result, ptr addrspace(1) %out, align 8 348 ret void 349} 350 351; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64: 352; SICIVI: s_mov_b32 m0 353; GFX9-NOT: m0 354 355; GCN: ds_wrxchg_rtn_b64 356; GCN: s_endpgm 357define amdgpu_kernel void @lds_atomic_xchg_noret_i64(ptr addrspace(3) %ptr) nounwind { 358 %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst 359 ret void 360} 361 362; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset: 363; SICIVI: s_mov_b32 m0 364; GFX9-NOT: m0 365 366; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32 367; GCN: s_endpgm 368define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 369 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 370 %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst 371 ret void 372} 373 374; GCN-LABEL: {{^}}lds_atomic_add_noret_i64: 375; SICIVI: s_mov_b32 m0 376; GFX9-NOT: m0 377 378; GCN: ds_add_u64 379; GCN: s_endpgm 380define amdgpu_kernel void @lds_atomic_add_noret_i64(ptr addrspace(3) %ptr) nounwind { 381 %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst 382 ret void 383} 384 385; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset: 386; SICIVI-DAG: s_mov_b32 m0 387; GFX9-NOT: m0 388 389; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9 390; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24 391; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 392; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 393; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 394; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] offset:32 395; GCN: s_endpgm 396define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 397 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 398 %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst 399 ret void 400} 401 402; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64: 403; SICIVI-DAG: s_mov_b32 m0 404; GFX9-NOT: m0 405 406; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 407; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 408; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 409; GCN: s_endpgm 410define amdgpu_kernel void @lds_atomic_add1_noret_i64(ptr addrspace(3) %ptr) nounwind { 411 %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst 412 ret void 413} 414 415; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset: 416; SICIVI: s_mov_b32 m0 417; GFX9-NOT: m0 418 419; GCN: ds_add_u64 {{.*}} offset:32 420; GCN: s_endpgm 421define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 422 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 423 %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst 424 ret void 425} 426 427; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64: 428; SICIVI: s_mov_b32 m0 429; GFX9-NOT: m0 430 431; GCN: ds_sub_u64 432; GCN: s_endpgm 433define amdgpu_kernel void @lds_atomic_sub_noret_i64(ptr addrspace(3) %ptr) nounwind { 434 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst 435 ret void 436} 437 438; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset: 439; SICIVI: s_mov_b32 m0 440; GFX9-NOT: m0 441 442; GCN: ds_sub_u64 {{.*}} offset:32 443; GCN: s_endpgm 444define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 445 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 446 %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst 447 ret void 448} 449 450; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64: 451; SICIVI-DAG: s_mov_b32 m0 452; GFX9-NOT: m0 453 454; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 455; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 456; GCN: ds_sub_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 457; GCN: s_endpgm 458define amdgpu_kernel void @lds_atomic_sub1_noret_i64(ptr addrspace(3) %ptr) nounwind { 459 %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst 460 ret void 461} 462 463; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset: 464; SICIVI: s_mov_b32 m0 465; GFX9-NOT: m0 466 467; GCN: ds_sub_u64 {{.*}} offset:32 468; GCN: s_endpgm 469define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 470 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 471 %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst 472 ret void 473} 474 475; GCN-LABEL: {{^}}lds_atomic_and_noret_i64: 476; SICIVI: s_mov_b32 m0 477; GFX9-NOT: m0 478 479; GCN: ds_and_b64 480; GCN: s_endpgm 481define amdgpu_kernel void @lds_atomic_and_noret_i64(ptr addrspace(3) %ptr) nounwind { 482 %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst 483 ret void 484} 485 486; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset: 487; SICIVI: s_mov_b32 m0 488; GFX9-NOT: m0 489 490; GCN: ds_and_b64 {{.*}} offset:32 491; GCN: s_endpgm 492define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 493 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 494 %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst 495 ret void 496} 497 498; GCN-LABEL: {{^}}lds_atomic_or_noret_i64: 499; SICIVI: s_mov_b32 m0 500; GFX9-NOT: m0 501 502; GCN: ds_or_b64 503; GCN: s_endpgm 504define amdgpu_kernel void @lds_atomic_or_noret_i64(ptr addrspace(3) %ptr) nounwind { 505 %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst 506 ret void 507} 508 509; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset: 510; SICIVI: s_mov_b32 m0 511; GFX9-NOT: m0 512 513; GCN: ds_or_b64 {{.*}} offset:32 514; GCN: s_endpgm 515define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 516 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 517 %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst 518 ret void 519} 520 521; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64: 522; SICIVI: s_mov_b32 m0 523; GFX9-NOT: m0 524 525; GCN: ds_xor_b64 526; GCN: s_endpgm 527define amdgpu_kernel void @lds_atomic_xor_noret_i64(ptr addrspace(3) %ptr) nounwind { 528 %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst 529 ret void 530} 531 532; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset: 533; SICIVI: s_mov_b32 m0 534; GFX9-NOT: m0 535 536; GCN: ds_xor_b64 {{.*}} offset:32 537; GCN: s_endpgm 538define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 539 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 540 %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst 541 ret void 542} 543 544; FIXME: There is no atomic nand instr 545; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this. 546; define amdgpu_kernel void @lds_atomic_nand_noret_i64(ptr addrspace(3) %ptr) nounwind { 547; %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst 548; ret void 549; } 550 551; GCN-LABEL: {{^}}lds_atomic_min_noret_i64: 552; SICIVI: s_mov_b32 m0 553; GFX9-NOT: m0 554 555; GCN: ds_min_i64 556; GCN: s_endpgm 557define amdgpu_kernel void @lds_atomic_min_noret_i64(ptr addrspace(3) %ptr) nounwind { 558 %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst 559 ret void 560} 561 562; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset: 563; SICIVI: s_mov_b32 m0 564; GFX9-NOT: m0 565 566; GCN: ds_min_i64 {{.*}} offset:32 567; GCN: s_endpgm 568define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 569 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 570 %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst 571 ret void 572} 573 574; GCN-LABEL: {{^}}lds_atomic_max_noret_i64: 575; SICIVI: s_mov_b32 m0 576; GFX9-NOT: m0 577 578; GCN: ds_max_i64 579; GCN: s_endpgm 580define amdgpu_kernel void @lds_atomic_max_noret_i64(ptr addrspace(3) %ptr) nounwind { 581 %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst 582 ret void 583} 584 585; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset: 586; SICIVI: s_mov_b32 m0 587; GFX9-NOT: m0 588 589; GCN: ds_max_i64 {{.*}} offset:32 590; GCN: s_endpgm 591define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 592 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 593 %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst 594 ret void 595} 596 597; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64: 598; SICIVI: s_mov_b32 m0 599; GFX9-NOT: m0 600 601; GCN: ds_min_u64 602; GCN: s_endpgm 603define amdgpu_kernel void @lds_atomic_umin_noret_i64(ptr addrspace(3) %ptr) nounwind { 604 %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst 605 ret void 606} 607 608; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset: 609; SICIVI: s_mov_b32 m0 610; GFX9-NOT: m0 611 612; GCN: ds_min_u64 {{.*}} offset:32 613; GCN: s_endpgm 614define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 615 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 616 %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst 617 ret void 618} 619 620; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64: 621; SICIVI: s_mov_b32 m0 622; GFX9-NOT: m0 623 624; GCN: ds_max_u64 625; GCN: s_endpgm 626define amdgpu_kernel void @lds_atomic_umax_noret_i64(ptr addrspace(3) %ptr) nounwind { 627 %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst 628 ret void 629} 630 631; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset: 632; SICIVI: s_mov_b32 m0 633; GFX9-NOT: m0 634 635; GCN: ds_max_u64 {{.*}} offset:32 636; GCN: s_endpgm 637define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { 638 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 639 %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst 640 ret void 641} 642 643; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64: 644; SICIVI: s_mov_b32 m0 645; GFX9-NOT: m0 646 647; GCN: ds_inc_rtn_u64 648; GCN: s_endpgm 649define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 650 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 4 seq_cst 651 store i64 %result, ptr addrspace(1) %out, align 8 652 ret void 653} 654 655; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset: 656; SICIVI-DAG: s_mov_b32 m0 657; GFX9-NOT: m0 658 659; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 660; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 661; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 662; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 663; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 664; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32 665; GCN: buffer_store_dwordx2 [[RESULT]], 666; GCN: s_endpgm 667define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 668 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 669 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 9 seq_cst 670 store i64 %result, ptr addrspace(1) %out, align 8 671 ret void 672} 673 674; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64: 675; SICIVI-DAG: s_mov_b32 m0 676; GFX9-NOT: m0 677 678; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 679; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 680; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 681; GCN: buffer_store_dwordx2 [[RESULT]], 682; GCN: s_endpgm 683define amdgpu_kernel void @lds_atomic_inc1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 684 %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 1 seq_cst 685 store i64 %result, ptr addrspace(1) %out, align 8 686 ret void 687} 688 689; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64_offset: 690; SICIVI: s_mov_b32 m0 691; GFX9-NOT: m0 692 693; GCN: ds_inc_rtn_u64 {{.*}} offset:32 694; GCN: s_endpgm 695define amdgpu_kernel void @lds_atomic_inc1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 696 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 697 %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 1 seq_cst 698 store i64 %result, ptr addrspace(1) %out, align 8 699 ret void 700} 701 702; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64: 703; SICIVI: s_mov_b32 m0 704; GFX9-NOT: m0 705 706; GCN: ds_dec_rtn_u64 707; GCN: s_endpgm 708define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 709 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 4 seq_cst 710 store i64 %result, ptr addrspace(1) %out, align 8 711 ret void 712} 713 714; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset: 715; SICIVI-DAG: s_mov_b32 m0 716; GFX9-NOT: m0 717 718; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb 719; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 720; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9 721; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0 722; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] 723; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32 724; GCN: buffer_store_dwordx2 [[RESULT]], 725; GCN: s_endpgm 726define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 727 %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4 728 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 9 seq_cst 729 store i64 %result, ptr addrspace(1) %out, align 8 730 ret void 731} 732 733; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64: 734; SICIVI-DAG: s_mov_b32 m0 735; GFX9-NOT: m0 736 737; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}} 738; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}} 739; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] 740; GCN: buffer_store_dwordx2 [[RESULT]], 741; GCN: s_endpgm 742define amdgpu_kernel void @lds_atomic_dec1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 743 %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 1 seq_cst 744 store i64 %result, ptr addrspace(1) %out, align 8 745 ret void 746} 747 748; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64_offset: 749; SICIVI: s_mov_b32 m0 750; GFX9-NOT: m0 751 752; GCN: ds_dec_rtn_u64 {{.*}} offset:32 753; GCN: s_endpgm 754define amdgpu_kernel void @lds_atomic_dec1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind { 755 %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 756 %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 1 seq_cst 757 store i64 %result, ptr addrspace(1) %out, align 8 758 ret void 759} 760