112409024SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 212409024SMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 312409024SMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s 412409024SMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 512409024SMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 612409024SMatt Arsenault 712409024SMatt Arsenaultdefine i8 @atomic_load_global_monotonic_i8(ptr addrspace(1) %ptr) { 812409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i8: 912409024SMatt Arsenault; GFX6: ; %bb.0: 1012409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1112409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 1212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 1312409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 1412409024SMatt Arsenault; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc 1512409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 1612409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 1712409024SMatt Arsenault; 1812409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i8: 1912409024SMatt Arsenault; GFX7: ; %bb.0: 2012409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2112409024SMatt Arsenault; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc 2212409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 2312409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 2412409024SMatt Arsenault; 2512409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i8: 2612409024SMatt Arsenault; GFX8: ; %bb.0: 2712409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2812409024SMatt Arsenault; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc 2912409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 3012409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 3112409024SMatt Arsenault; 3212409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i8: 3312409024SMatt Arsenault; GFX9: ; %bb.0: 3412409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3512409024SMatt Arsenault; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 3612409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 3712409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 3812409024SMatt Arsenault %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 3912409024SMatt Arsenault ret i8 %load 4012409024SMatt Arsenault} 4112409024SMatt Arsenault 4212409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i8_zext_to_i32(ptr addrspace(1) %ptr) { 4312409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i32: 4412409024SMatt Arsenault; GFX6: ; %bb.0: 4512409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4612409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 4712409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 4812409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 4912409024SMatt Arsenault; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc 5012409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 5112409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 5212409024SMatt Arsenault; 5312409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i32: 5412409024SMatt Arsenault; GFX7: ; %bb.0: 5512409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5612409024SMatt Arsenault; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc 5712409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 5812409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 5912409024SMatt Arsenault; 6012409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i8_zext_to_i32: 6112409024SMatt Arsenault; GFX8: ; %bb.0: 6212409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6312409024SMatt Arsenault; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc 6412409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 6512409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 6612409024SMatt Arsenault; 6712409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i8_zext_to_i32: 6812409024SMatt Arsenault; GFX9: ; %bb.0: 6912409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7012409024SMatt Arsenault; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 7112409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 7212409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 7312409024SMatt Arsenault %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 7412409024SMatt Arsenault %ext = zext i8 %load to i32 7512409024SMatt Arsenault ret i32 %ext 7612409024SMatt Arsenault} 7712409024SMatt Arsenault 7812409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) { 7912409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: 8012409024SMatt Arsenault; GFX6: ; %bb.0: 8112409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 8312409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 8412409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 85*db5bcb24SMatt Arsenault; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc 8612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 8712409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 8812409024SMatt Arsenault; 8912409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: 9012409024SMatt Arsenault; GFX7: ; %bb.0: 9112409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92*db5bcb24SMatt Arsenault; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc 9312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 9412409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 9512409024SMatt Arsenault; 9612409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: 9712409024SMatt Arsenault; GFX8: ; %bb.0: 9812409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99*db5bcb24SMatt Arsenault; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc 10012409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 10112409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 10212409024SMatt Arsenault; 10312409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: 10412409024SMatt Arsenault; GFX9: ; %bb.0: 10512409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106*db5bcb24SMatt Arsenault; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc 10712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 10812409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 10912409024SMatt Arsenault %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 11012409024SMatt Arsenault %ext = sext i8 %load to i32 11112409024SMatt Arsenault ret i32 %ext 11212409024SMatt Arsenault} 11312409024SMatt Arsenault 11412409024SMatt Arsenaultdefine i16 @atomic_load_global_monotonic_i8_zext_to_i16(ptr addrspace(1) %ptr) { 11512409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i8_zext_to_i16: 11612409024SMatt Arsenault; GFX6: ; %bb.0: 11712409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11812409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 11912409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 12012409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 12112409024SMatt Arsenault; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc 12212409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 12312409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 12412409024SMatt Arsenault; 12512409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i8_zext_to_i16: 12612409024SMatt Arsenault; GFX7: ; %bb.0: 12712409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12812409024SMatt Arsenault; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc 12912409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 13012409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 13112409024SMatt Arsenault; 13212409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i8_zext_to_i16: 13312409024SMatt Arsenault; GFX8: ; %bb.0: 13412409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13512409024SMatt Arsenault; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc 13612409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 13712409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 13812409024SMatt Arsenault; 13912409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i8_zext_to_i16: 14012409024SMatt Arsenault; GFX9: ; %bb.0: 14112409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14212409024SMatt Arsenault; GFX9-NEXT: global_load_ubyte v0, v[0:1], off glc 14312409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 14412409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 14512409024SMatt Arsenault %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 14612409024SMatt Arsenault %ext = zext i8 %load to i16 14712409024SMatt Arsenault ret i16 %ext 14812409024SMatt Arsenault} 14912409024SMatt Arsenault 15012409024SMatt Arsenaultdefine i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) { 15112409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: 15212409024SMatt Arsenault; GFX6: ; %bb.0: 15312409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15412409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 15512409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 15612409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 157*db5bcb24SMatt Arsenault; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc 15812409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 15912409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 16012409024SMatt Arsenault; 16112409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: 16212409024SMatt Arsenault; GFX7: ; %bb.0: 16312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164*db5bcb24SMatt Arsenault; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc 16512409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 16612409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 16712409024SMatt Arsenault; 16812409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: 16912409024SMatt Arsenault; GFX8: ; %bb.0: 17012409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171*db5bcb24SMatt Arsenault; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc 17212409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 17312409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 17412409024SMatt Arsenault; 17512409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: 17612409024SMatt Arsenault; GFX9: ; %bb.0: 17712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178*db5bcb24SMatt Arsenault; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc 17912409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 18012409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 18112409024SMatt Arsenault %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 18212409024SMatt Arsenault %ext = sext i8 %load to i16 18312409024SMatt Arsenault ret i16 %ext 18412409024SMatt Arsenault} 18512409024SMatt Arsenault 18612409024SMatt Arsenaultdefine i16 @atomic_load_global_monotonic_i16(ptr addrspace(1) %ptr) { 18712409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16: 18812409024SMatt Arsenault; GFX6: ; %bb.0: 18912409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19012409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 19112409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 19212409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 19312409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 19412409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 19512409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 19612409024SMatt Arsenault; 19712409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16: 19812409024SMatt Arsenault; GFX7: ; %bb.0: 19912409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20012409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 20112409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 20212409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 20312409024SMatt Arsenault; 20412409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16: 20512409024SMatt Arsenault; GFX8: ; %bb.0: 20612409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20712409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 20812409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 20912409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 21012409024SMatt Arsenault; 21112409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16: 21212409024SMatt Arsenault; GFX9: ; %bb.0: 21312409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21412409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 21512409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 21612409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 21712409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 21812409024SMatt Arsenault ret i16 %load 21912409024SMatt Arsenault} 22012409024SMatt Arsenault 22112409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i16_zext_to_i32(ptr addrspace(1) %ptr) { 22212409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_zext_to_i32: 22312409024SMatt Arsenault; GFX6: ; %bb.0: 22412409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22512409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 22612409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 22712409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 22812409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 22912409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 23012409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 23112409024SMatt Arsenault; 23212409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_zext_to_i32: 23312409024SMatt Arsenault; GFX7: ; %bb.0: 23412409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23512409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 23612409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 23712409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 23812409024SMatt Arsenault; 23912409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_zext_to_i32: 24012409024SMatt Arsenault; GFX8: ; %bb.0: 24112409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24212409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 24312409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 24412409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 24512409024SMatt Arsenault; 24612409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_zext_to_i32: 24712409024SMatt Arsenault; GFX9: ; %bb.0: 24812409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24912409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 25012409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 25112409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 25212409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 25312409024SMatt Arsenault %ext = zext i16 %load to i32 25412409024SMatt Arsenault ret i32 %ext 25512409024SMatt Arsenault} 25612409024SMatt Arsenault 25712409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr) { 25812409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: 25912409024SMatt Arsenault; GFX6: ; %bb.0: 26012409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26112409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 26212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 26312409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 264*db5bcb24SMatt Arsenault; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc 26512409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 26612409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 26712409024SMatt Arsenault; 26812409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: 26912409024SMatt Arsenault; GFX7: ; %bb.0: 27012409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271*db5bcb24SMatt Arsenault; GFX7-NEXT: flat_load_sshort v0, v[0:1] glc 27212409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 27312409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 27412409024SMatt Arsenault; 27512409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: 27612409024SMatt Arsenault; GFX8: ; %bb.0: 27712409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278*db5bcb24SMatt Arsenault; GFX8-NEXT: flat_load_sshort v0, v[0:1] glc 27912409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 28012409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 28112409024SMatt Arsenault; 28212409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: 28312409024SMatt Arsenault; GFX9: ; %bb.0: 28412409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285*db5bcb24SMatt Arsenault; GFX9-NEXT: global_load_sshort v0, v[0:1], off glc 28612409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 28712409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 28812409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 28912409024SMatt Arsenault %ext = sext i16 %load to i32 29012409024SMatt Arsenault ret i32 %ext 29112409024SMatt Arsenault} 29212409024SMatt Arsenault 29312409024SMatt Arsenaultdefine half @atomic_load_global_monotonic_f16(ptr addrspace(1) %ptr) { 29412409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_f16: 29512409024SMatt Arsenault; GFX6: ; %bb.0: 29612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29712409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 29812409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 29912409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 30012409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 30112409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 30212409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 30312409024SMatt Arsenault; 30412409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_f16: 30512409024SMatt Arsenault; GFX7: ; %bb.0: 30612409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30712409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 30812409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 30912409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 31012409024SMatt Arsenault; 31112409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_f16: 31212409024SMatt Arsenault; GFX8: ; %bb.0: 31312409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31412409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 31512409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 31612409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 31712409024SMatt Arsenault; 31812409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_f16: 31912409024SMatt Arsenault; GFX9: ; %bb.0: 32012409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32112409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 32212409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 32312409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 32412409024SMatt Arsenault %load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2 32512409024SMatt Arsenault ret half %load 32612409024SMatt Arsenault} 32712409024SMatt Arsenault 32812409024SMatt Arsenaultdefine bfloat @atomic_load_global_monotonic_bf16(ptr addrspace(1) %ptr) { 32912409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_bf16: 33012409024SMatt Arsenault; GFX6: ; %bb.0: 33112409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 33312409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 33412409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 33512409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 33612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 33712409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 33812409024SMatt Arsenault; 33912409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_bf16: 34012409024SMatt Arsenault; GFX7: ; %bb.0: 34112409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34212409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 34312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 34412409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 34512409024SMatt Arsenault; 34612409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_bf16: 34712409024SMatt Arsenault; GFX8: ; %bb.0: 34812409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34912409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 35012409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 35112409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 35212409024SMatt Arsenault; 35312409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_bf16: 35412409024SMatt Arsenault; GFX9: ; %bb.0: 35512409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35612409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 35712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 35812409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 35912409024SMatt Arsenault %load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2 36012409024SMatt Arsenault ret bfloat %load 36112409024SMatt Arsenault} 36212409024SMatt Arsenault 36312409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_f16_zext_to_i32(ptr addrspace(1) %ptr) { 36412409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: 36512409024SMatt Arsenault; GFX6: ; %bb.0: 36612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36712409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 36812409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 36912409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 37012409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 37112409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 37212409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 37312409024SMatt Arsenault; 37412409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: 37512409024SMatt Arsenault; GFX7: ; %bb.0: 37612409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37712409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 37812409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 37912409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 38012409024SMatt Arsenault; 38112409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: 38212409024SMatt Arsenault; GFX8: ; %bb.0: 38312409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38412409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 38512409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 38612409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 38712409024SMatt Arsenault; 38812409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_f16_zext_to_i32: 38912409024SMatt Arsenault; GFX9: ; %bb.0: 39012409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39112409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 39212409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 39312409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 39412409024SMatt Arsenault %load = load atomic half, ptr addrspace(1) %ptr monotonic, align 2 39512409024SMatt Arsenault %cast = bitcast half %load to i16 39612409024SMatt Arsenault %ext = zext i16 %cast to i32 39712409024SMatt Arsenault ret i32 %ext 39812409024SMatt Arsenault} 39912409024SMatt Arsenault 40012409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_bf16_zext_to_i32(ptr addrspace(1) %ptr) { 40112409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: 40212409024SMatt Arsenault; GFX6: ; %bb.0: 40312409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40412409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 40512409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 40612409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 40712409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 40812409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 40912409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 41012409024SMatt Arsenault; 41112409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: 41212409024SMatt Arsenault; GFX7: ; %bb.0: 41312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41412409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 41512409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 41612409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 41712409024SMatt Arsenault; 41812409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: 41912409024SMatt Arsenault; GFX8: ; %bb.0: 42012409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42112409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 42212409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 42312409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 42412409024SMatt Arsenault; 42512409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_bf16_zext_to_i32: 42612409024SMatt Arsenault; GFX9: ; %bb.0: 42712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42812409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 42912409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 43012409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 43112409024SMatt Arsenault %load = load atomic bfloat, ptr addrspace(1) %ptr monotonic, align 2 43212409024SMatt Arsenault %cast = bitcast bfloat %load to i16 43312409024SMatt Arsenault %ext = zext i16 %cast to i32 43412409024SMatt Arsenault ret i32 %ext 43512409024SMatt Arsenault} 43612409024SMatt Arsenault 43712409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i16_d16_hi_shift(ptr addrspace(1) %ptr) { 43812409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift: 43912409024SMatt Arsenault; GFX6: ; %bb.0: 44012409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44112409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 44212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 44312409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 44412409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 44512409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 44612409024SMatt Arsenault; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 44712409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 44812409024SMatt Arsenault; 44912409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift: 45012409024SMatt Arsenault; GFX7: ; %bb.0: 45112409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45212409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 45312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 45412409024SMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 45512409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 45612409024SMatt Arsenault; 45712409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift: 45812409024SMatt Arsenault; GFX8: ; %bb.0: 45912409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46012409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 46112409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 46212409024SMatt Arsenault; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 46312409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 46412409024SMatt Arsenault; 46512409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_d16_hi_shift: 46612409024SMatt Arsenault; GFX9: ; %bb.0: 46712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46812409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 46912409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 47012409024SMatt Arsenault; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 47112409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 47212409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 47312409024SMatt Arsenault %ext = zext i16 %load to i32 47412409024SMatt Arsenault %shl = shl i32 %ext, 16 47512409024SMatt Arsenault ret i32 %shl 47612409024SMatt Arsenault} 47712409024SMatt Arsenault 47812409024SMatt Arsenaultdefine <2 x i16> @atomic_load_global_monotonic_i16_d16_hi_vector_insert(ptr addrspace(1) %ptr, <2 x i16> %vec) { 47912409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert: 48012409024SMatt Arsenault; GFX6: ; %bb.0: 48112409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48212409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 48312409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 48412409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 48512409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 48612409024SMatt Arsenault; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 48712409024SMatt Arsenault; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 48812409024SMatt Arsenault; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 48912409024SMatt Arsenault; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 49012409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 49112409024SMatt Arsenault; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 49212409024SMatt Arsenault; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 49312409024SMatt Arsenault; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 49412409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 49512409024SMatt Arsenault; 49612409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert: 49712409024SMatt Arsenault; GFX7: ; %bb.0: 49812409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49912409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 50012409024SMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3 50112409024SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 50212409024SMatt Arsenault; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 50312409024SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 50412409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 50512409024SMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 50612409024SMatt Arsenault; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 50712409024SMatt Arsenault; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 50812409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 50912409024SMatt Arsenault; 51012409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert: 51112409024SMatt Arsenault; GFX8: ; %bb.0: 51212409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51312409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 51412409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 51512409024SMatt Arsenault; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 51612409024SMatt Arsenault; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 51712409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 51812409024SMatt Arsenault; 51912409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_d16_hi_vector_insert: 52012409024SMatt Arsenault; GFX9: ; %bb.0: 52112409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52212409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 52312409024SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff 52412409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 52512409024SMatt Arsenault; GFX9-NEXT: v_lshlrev_b32_e32 v0, 16, v0 52612409024SMatt Arsenault; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0 52712409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 52812409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 52912409024SMatt Arsenault %insert = insertelement <2 x i16> %vec, i16 %load, i32 1 53012409024SMatt Arsenault ret <2 x i16> %insert 53112409024SMatt Arsenault} 53212409024SMatt Arsenault 53312409024SMatt Arsenaultdefine i32 @atomic_load_global_monotonic_i16_d16_lo_or(ptr addrspace(1) %ptr, i16 %high) { 53412409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_or: 53512409024SMatt Arsenault; GFX6: ; %bb.0: 53612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53712409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 53812409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 53912409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 54012409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 54112409024SMatt Arsenault; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 54212409024SMatt Arsenault; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 54312409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 54412409024SMatt Arsenault; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 54512409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 54612409024SMatt Arsenault; 54712409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_or: 54812409024SMatt Arsenault; GFX7: ; %bb.0: 54912409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55012409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 55112409024SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v2 55212409024SMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 55312409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 55412409024SMatt Arsenault; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 55512409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 55612409024SMatt Arsenault; 55712409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_d16_lo_or: 55812409024SMatt Arsenault; GFX8: ; %bb.0: 55912409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56012409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 56112409024SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v2 56212409024SMatt Arsenault; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 56312409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 56412409024SMatt Arsenault; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 56512409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 56612409024SMatt Arsenault; 56712409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_d16_lo_or: 56812409024SMatt Arsenault; GFX9: ; %bb.0: 56912409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57012409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 57112409024SMatt Arsenault; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2 57212409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 57312409024SMatt Arsenault; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 57412409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 57512409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 57612409024SMatt Arsenault %ext = zext i16 %load to i32 57712409024SMatt Arsenault %high.ext = zext i16 %high to i32 57812409024SMatt Arsenault %shl = shl i32 %high.ext, 16 57912409024SMatt Arsenault %or = or i32 %shl, %ext 58012409024SMatt Arsenault ret i32 %or 58112409024SMatt Arsenault} 58212409024SMatt Arsenault 58312409024SMatt Arsenaultdefine <2 x i16> @atomic_load_global_monotonic_i16_d16_lo_vector_insert(ptr addrspace(1) %ptr, <2 x i16> %vec) { 58412409024SMatt Arsenault; GFX6-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert: 58512409024SMatt Arsenault; GFX6: ; %bb.0: 58612409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58712409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s6, 0 58812409024SMatt Arsenault; GFX6-NEXT: s_mov_b32 s7, 0x100f000 58912409024SMatt Arsenault; GFX6-NEXT: s_mov_b64 s[4:5], 0 59012409024SMatt Arsenault; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc 59112409024SMatt Arsenault; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 59212409024SMatt Arsenault; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 59312409024SMatt Arsenault; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 59412409024SMatt Arsenault; GFX6-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 59512409024SMatt Arsenault; GFX6-NEXT: s_waitcnt vmcnt(0) 59612409024SMatt Arsenault; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 59712409024SMatt Arsenault; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 59812409024SMatt Arsenault; GFX6-NEXT: s_setpc_b64 s[30:31] 59912409024SMatt Arsenault; 60012409024SMatt Arsenault; GFX7-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert: 60112409024SMatt Arsenault; GFX7: ; %bb.0: 60212409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60312409024SMatt Arsenault; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc 60412409024SMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v3 60512409024SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 60612409024SMatt Arsenault; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 60712409024SMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 60812409024SMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 60912409024SMatt Arsenault; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 61012409024SMatt Arsenault; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 61112409024SMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 61212409024SMatt Arsenault; 61312409024SMatt Arsenault; GFX8-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert: 61412409024SMatt Arsenault; GFX8: ; %bb.0: 61512409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61612409024SMatt Arsenault; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc 61712409024SMatt Arsenault; GFX8-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 61812409024SMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 61912409024SMatt Arsenault; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 62012409024SMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 62112409024SMatt Arsenault; 62212409024SMatt Arsenault; GFX9-LABEL: atomic_load_global_monotonic_i16_d16_lo_vector_insert: 62312409024SMatt Arsenault; GFX9: ; %bb.0: 62412409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62512409024SMatt Arsenault; GFX9-NEXT: global_load_ushort v0, v[0:1], off glc 62612409024SMatt Arsenault; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff0000 62712409024SMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 62812409024SMatt Arsenault; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0 62912409024SMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 63012409024SMatt Arsenault %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 63112409024SMatt Arsenault %insert = insertelement <2 x i16> %vec, i16 %load, i32 0 63212409024SMatt Arsenault ret <2 x i16> %insert 63312409024SMatt Arsenault} 63412409024SMatt Arsenault;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 63512409024SMatt Arsenault; GCN: {{.*}} 636