1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s 4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s 7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s 8; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s 9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s 10 11; Test splitting flat instruction offsets into the low and high bits 12; when the offset doesn't fit in the offset field. 13 14define i8 @global_inst_valu_offset_1(ptr addrspace(1) %p) { 15; GFX9-LABEL: global_inst_valu_offset_1: 16; GFX9: ; %bb.0: 17; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1 19; GFX9-NEXT: s_waitcnt vmcnt(0) 20; GFX9-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX10-LABEL: global_inst_valu_offset_1: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1 26; GFX10-NEXT: s_waitcnt vmcnt(0) 27; GFX10-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX11-LABEL: global_inst_valu_offset_1: 30; GFX11: ; %bb.0: 31; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1 33; GFX11-NEXT: s_waitcnt vmcnt(0) 34; GFX11-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX12-LABEL: global_inst_valu_offset_1: 37; GFX12: ; %bb.0: 38; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 39; GFX12-NEXT: s_wait_expcnt 0x0 40; GFX12-NEXT: s_wait_samplecnt 0x0 41; GFX12-NEXT: s_wait_bvhcnt 0x0 42; GFX12-NEXT: s_wait_kmcnt 0x0 43; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:1 44; GFX12-NEXT: s_wait_loadcnt 0x0 45; GFX12-NEXT: s_setpc_b64 s[30:31] 46 %gep = getelementptr i8, ptr addrspace(1) %p, i64 1 47 %load = load i8, ptr addrspace(1) %gep, align 4 48 ret i8 %load 49} 50 51define i8 @global_inst_valu_offset_11bit_max(ptr addrspace(1) %p) { 52; GFX9-LABEL: global_inst_valu_offset_11bit_max: 53; GFX9: ; %bb.0: 54; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 56; GFX9-NEXT: s_waitcnt vmcnt(0) 57; GFX9-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX10-LABEL: global_inst_valu_offset_11bit_max: 60; GFX10: ; %bb.0: 61; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 63; GFX10-NEXT: s_waitcnt vmcnt(0) 64; GFX10-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX11-LABEL: global_inst_valu_offset_11bit_max: 67; GFX11: ; %bb.0: 68; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047 70; GFX11-NEXT: s_waitcnt vmcnt(0) 71; GFX11-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX12-LABEL: global_inst_valu_offset_11bit_max: 74; GFX12: ; %bb.0: 75; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 76; GFX12-NEXT: s_wait_expcnt 0x0 77; GFX12-NEXT: s_wait_samplecnt 0x0 78; GFX12-NEXT: s_wait_bvhcnt 0x0 79; GFX12-NEXT: s_wait_kmcnt 0x0 80; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:2047 81; GFX12-NEXT: s_wait_loadcnt 0x0 82; GFX12-NEXT: s_setpc_b64 s[30:31] 83 %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047 84 %load = load i8, ptr addrspace(1) %gep, align 4 85 ret i8 %load 86} 87 88define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) { 89; GFX9-LABEL: global_inst_valu_offset_12bit_max: 90; GFX9: ; %bb.0: 91; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 93; GFX9-NEXT: s_waitcnt vmcnt(0) 94; GFX9-NEXT: s_setpc_b64 s[30:31] 95; 96; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max: 97; GFX10-GISEL: ; %bb.0: 98; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 100; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 101; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 102; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 103; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX11-LABEL: global_inst_valu_offset_12bit_max: 106; GFX11: ; %bb.0: 107; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 109; GFX11-NEXT: s_waitcnt vmcnt(0) 110; GFX11-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX12-LABEL: global_inst_valu_offset_12bit_max: 113; GFX12: ; %bb.0: 114; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 115; GFX12-NEXT: s_wait_expcnt 0x0 116; GFX12-NEXT: s_wait_samplecnt 0x0 117; GFX12-NEXT: s_wait_bvhcnt 0x0 118; GFX12-NEXT: s_wait_kmcnt 0x0 119; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:4095 120; GFX12-NEXT: s_wait_loadcnt 0x0 121; GFX12-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX10-SDAG-LABEL: global_inst_valu_offset_12bit_max: 124; GFX10-SDAG: ; %bb.0: 125; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 127; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 128; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 129; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 130; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 131 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095 132 %load = load i8, ptr addrspace(1) %gep, align 4 133 ret i8 %load 134} 135 136define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) { 137; GFX9-GISEL-LABEL: global_inst_valu_offset_13bit_max: 138; GFX9-GISEL: ; %bb.0: 139; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 141; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 142; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 143; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 144; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 145; 146; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max: 147; GFX10-GISEL: ; %bb.0: 148; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 150; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 151; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 152; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 153; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max: 156; GFX11-GISEL: ; %bb.0: 157; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 159; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 160; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 161; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 162; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX12-LABEL: global_inst_valu_offset_13bit_max: 165; GFX12: ; %bb.0: 166; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 167; GFX12-NEXT: s_wait_expcnt 0x0 168; GFX12-NEXT: s_wait_samplecnt 0x0 169; GFX12-NEXT: s_wait_bvhcnt 0x0 170; GFX12-NEXT: s_wait_kmcnt 0x0 171; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:8191 172; GFX12-NEXT: s_wait_loadcnt 0x0 173; GFX12-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX9-SDAG-LABEL: global_inst_valu_offset_13bit_max: 176; GFX9-SDAG: ; %bb.0: 177; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 179; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 180; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 181; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 182; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX10-SDAG-LABEL: global_inst_valu_offset_13bit_max: 185; GFX10-SDAG: ; %bb.0: 186; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 188; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 189; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 190; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 191; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max: 194; GFX11-SDAG: ; %bb.0: 195; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 197; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 198; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 199; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 200; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 201 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191 202 %load = load i8, ptr addrspace(1) %gep, align 4 203 ret i8 %load 204} 205 206define i8 @global_inst_valu_offset_24bit_max(ptr addrspace(1) %p) { 207; GFX9-GISEL-LABEL: global_inst_valu_offset_24bit_max: 208; GFX9-GISEL: ; %bb.0: 209; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 211; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 212; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 213; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 214; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX10-GISEL-LABEL: global_inst_valu_offset_24bit_max: 217; GFX10-GISEL: ; %bb.0: 218; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 220; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 221; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 222; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 223; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX11-GISEL-LABEL: global_inst_valu_offset_24bit_max: 226; GFX11-GISEL: ; %bb.0: 227; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 229; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 230; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 231; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 232; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX12-LABEL: global_inst_valu_offset_24bit_max: 235; GFX12: ; %bb.0: 236; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 237; GFX12-NEXT: s_wait_expcnt 0x0 238; GFX12-NEXT: s_wait_samplecnt 0x0 239; GFX12-NEXT: s_wait_bvhcnt 0x0 240; GFX12-NEXT: s_wait_kmcnt 0x0 241; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:8388607 242; GFX12-NEXT: s_wait_loadcnt 0x0 243; GFX12-NEXT: s_setpc_b64 s[30:31] 244; 245; GFX9-SDAG-LABEL: global_inst_valu_offset_24bit_max: 246; GFX9-SDAG: ; %bb.0: 247; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 249; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 250; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 251; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 252; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 253; 254; GFX10-SDAG-LABEL: global_inst_valu_offset_24bit_max: 255; GFX10-SDAG: ; %bb.0: 256; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff800, v0 258; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 259; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 260; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 261; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX11-SDAG-LABEL: global_inst_valu_offset_24bit_max: 264; GFX11-SDAG: ; %bb.0: 265; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 267; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 268; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 269; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 270; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 271 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8388607 272 %load = load i8, ptr addrspace(1) %gep, align 4 273 ret i8 %load 274} 275 276define i8 @global_inst_valu_offset_neg_11bit_max(ptr addrspace(1) %p) { 277; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max: 278; GFX9: ; %bb.0: 279; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 280; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 281; GFX9-NEXT: s_waitcnt vmcnt(0) 282; GFX9-NEXT: s_setpc_b64 s[30:31] 283; 284; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max: 285; GFX10: ; %bb.0: 286; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 287; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 288; GFX10-NEXT: s_waitcnt vmcnt(0) 289; GFX10-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max: 292; GFX11: ; %bb.0: 293; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 295; GFX11-NEXT: s_waitcnt vmcnt(0) 296; GFX11-NEXT: s_setpc_b64 s[30:31] 297; 298; GFX12-LABEL: global_inst_valu_offset_neg_11bit_max: 299; GFX12: ; %bb.0: 300; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 301; GFX12-NEXT: s_wait_expcnt 0x0 302; GFX12-NEXT: s_wait_samplecnt 0x0 303; GFX12-NEXT: s_wait_bvhcnt 0x0 304; GFX12-NEXT: s_wait_kmcnt 0x0 305; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 306; GFX12-NEXT: s_wait_loadcnt 0x0 307; GFX12-NEXT: s_setpc_b64 s[30:31] 308 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048 309 %load = load i8, ptr addrspace(1) %gep, align 4 310 ret i8 %load 311} 312 313define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) { 314; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max: 315; GFX9: ; %bb.0: 316; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 318; GFX9-NEXT: s_waitcnt vmcnt(0) 319; GFX9-NEXT: s_setpc_b64 s[30:31] 320; 321; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max: 322; GFX10: ; %bb.0: 323; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 325; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 326; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 327; GFX10-NEXT: s_waitcnt vmcnt(0) 328; GFX10-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max: 331; GFX11: ; %bb.0: 332; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 334; GFX11-NEXT: s_waitcnt vmcnt(0) 335; GFX11-NEXT: s_setpc_b64 s[30:31] 336; 337; GFX12-LABEL: global_inst_valu_offset_neg_12bit_max: 338; GFX12: ; %bb.0: 339; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 340; GFX12-NEXT: s_wait_expcnt 0x0 341; GFX12-NEXT: s_wait_samplecnt 0x0 342; GFX12-NEXT: s_wait_bvhcnt 0x0 343; GFX12-NEXT: s_wait_kmcnt 0x0 344; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 345; GFX12-NEXT: s_wait_loadcnt 0x0 346; GFX12-NEXT: s_setpc_b64 s[30:31] 347 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 348 %load = load i8, ptr addrspace(1) %gep, align 4 349 ret i8 %load 350} 351 352define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) { 353; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max: 354; GFX9: ; %bb.0: 355; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 357; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 358; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 359; GFX9-NEXT: s_waitcnt vmcnt(0) 360; GFX9-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max: 363; GFX10: ; %bb.0: 364; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 366; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 367; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 368; GFX10-NEXT: s_waitcnt vmcnt(0) 369; GFX10-NEXT: s_setpc_b64 s[30:31] 370; 371; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max: 372; GFX11: ; %bb.0: 373; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 375; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 376; GFX11-NEXT: global_load_u8 v0, v[0:1], off 377; GFX11-NEXT: s_waitcnt vmcnt(0) 378; GFX11-NEXT: s_setpc_b64 s[30:31] 379; 380; GFX12-LABEL: global_inst_valu_offset_neg_13bit_max: 381; GFX12: ; %bb.0: 382; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 383; GFX12-NEXT: s_wait_expcnt 0x0 384; GFX12-NEXT: s_wait_samplecnt 0x0 385; GFX12-NEXT: s_wait_bvhcnt 0x0 386; GFX12-NEXT: s_wait_kmcnt 0x0 387; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-8192 388; GFX12-NEXT: s_wait_loadcnt 0x0 389; GFX12-NEXT: s_setpc_b64 s[30:31] 390 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 391 %load = load i8, ptr addrspace(1) %gep, align 4 392 ret i8 %load 393} 394 395define i8 @global_inst_valu_offset_neg_24bit_max(ptr addrspace(1) %p) { 396; GFX9-LABEL: global_inst_valu_offset_neg_24bit_max: 397; GFX9: ; %bb.0: 398; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 399; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 400; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 401; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 402; GFX9-NEXT: s_waitcnt vmcnt(0) 403; GFX9-NEXT: s_setpc_b64 s[30:31] 404; 405; GFX10-LABEL: global_inst_valu_offset_neg_24bit_max: 406; GFX10: ; %bb.0: 407; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 409; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 410; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 411; GFX10-NEXT: s_waitcnt vmcnt(0) 412; GFX10-NEXT: s_setpc_b64 s[30:31] 413; 414; GFX11-LABEL: global_inst_valu_offset_neg_24bit_max: 415; GFX11: ; %bb.0: 416; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 417; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 418; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 419; GFX11-NEXT: global_load_u8 v0, v[0:1], off 420; GFX11-NEXT: s_waitcnt vmcnt(0) 421; GFX11-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX12-LABEL: global_inst_valu_offset_neg_24bit_max: 424; GFX12: ; %bb.0: 425; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 426; GFX12-NEXT: s_wait_expcnt 0x0 427; GFX12-NEXT: s_wait_samplecnt 0x0 428; GFX12-NEXT: s_wait_bvhcnt 0x0 429; GFX12-NEXT: s_wait_kmcnt 0x0 430; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-8388608 431; GFX12-NEXT: s_wait_loadcnt 0x0 432; GFX12-NEXT: s_setpc_b64 s[30:31] 433 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8388608 434 %load = load i8, ptr addrspace(1) %gep, align 4 435 ret i8 %load 436} 437 438define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) { 439; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max: 440; GFX9: ; %bb.0: 441; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 442; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 443; GFX9-NEXT: s_waitcnt vmcnt(0) 444; GFX9-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max: 447; GFX10-GISEL: ; %bb.0: 448; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 450; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 451; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 452; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 453; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 454; 455; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max: 456; GFX11: ; %bb.0: 457; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 458; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095 459; GFX11-NEXT: s_waitcnt vmcnt(0) 460; GFX11-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX12-LABEL: global_inst_valu_offset_2x_11bit_max: 463; GFX12: ; %bb.0: 464; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 465; GFX12-NEXT: s_wait_expcnt 0x0 466; GFX12-NEXT: s_wait_samplecnt 0x0 467; GFX12-NEXT: s_wait_bvhcnt 0x0 468; GFX12-NEXT: s_wait_kmcnt 0x0 469; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:4095 470; GFX12-NEXT: s_wait_loadcnt 0x0 471; GFX12-NEXT: s_setpc_b64 s[30:31] 472; 473; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_11bit_max: 474; GFX10-SDAG: ; %bb.0: 475; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 477; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 478; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 479; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 480; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 481 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095 482 %load = load i8, ptr addrspace(1) %gep, align 4 483 ret i8 %load 484} 485 486define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) { 487; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: 488; GFX9-GISEL: ; %bb.0: 489; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 491; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 492; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 493; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 494; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: 497; GFX10-GISEL: ; %bb.0: 498; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 500; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 501; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 502; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 503; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 504; 505; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max: 506; GFX11-GISEL: ; %bb.0: 507; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 509; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 510; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 511; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 512; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 513; 514; GFX12-LABEL: global_inst_valu_offset_2x_12bit_max: 515; GFX12: ; %bb.0: 516; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 517; GFX12-NEXT: s_wait_expcnt 0x0 518; GFX12-NEXT: s_wait_samplecnt 0x0 519; GFX12-NEXT: s_wait_bvhcnt 0x0 520; GFX12-NEXT: s_wait_kmcnt 0x0 521; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:8191 522; GFX12-NEXT: s_wait_loadcnt 0x0 523; GFX12-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: 526; GFX9-SDAG: ; %bb.0: 527; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 529; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 530; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 531; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 532; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: 535; GFX10-SDAG: ; %bb.0: 536; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 538; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 539; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 540; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 541; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max: 544; GFX11-SDAG: ; %bb.0: 545; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 547; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 548; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 549; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 550; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 551 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191 552 %load = load i8, ptr addrspace(1) %gep, align 4 553 ret i8 %load 554} 555 556define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) { 557; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: 558; GFX9-GISEL: ; %bb.0: 559; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0 561; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 562; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 563; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 564; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 565; 566; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: 567; GFX10-GISEL: ; %bb.0: 568; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 570; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 571; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 572; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 573; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 574; 575; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max: 576; GFX11-GISEL: ; %bb.0: 577; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 578; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 579; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 580; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 581; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 582; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 583; 584; GFX12-LABEL: global_inst_valu_offset_2x_13bit_max: 585; GFX12: ; %bb.0: 586; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 587; GFX12-NEXT: s_wait_expcnt 0x0 588; GFX12-NEXT: s_wait_samplecnt 0x0 589; GFX12-NEXT: s_wait_bvhcnt 0x0 590; GFX12-NEXT: s_wait_kmcnt 0x0 591; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:16383 592; GFX12-NEXT: s_wait_loadcnt 0x0 593; GFX12-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: 596; GFX9-SDAG: ; %bb.0: 597; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 599; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 600; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 601; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 602; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 603; 604; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: 605; GFX10-SDAG: ; %bb.0: 606; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0 608; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 609; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 610; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 611; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 612; 613; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max: 614; GFX11-SDAG: ; %bb.0: 615; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 617; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 618; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 619; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 620; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 621 %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383 622 %load = load i8, ptr addrspace(1) %gep, align 4 623 ret i8 %load 624} 625 626define i8 @global_inst_valu_offset_2x_24bit_max(ptr addrspace(1) %p) { 627; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max: 628; GFX9-GISEL: ; %bb.0: 629; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffe, v0 631; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 632; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 633; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 634; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 635; 636; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max: 637; GFX10-GISEL: ; %bb.0: 638; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 639; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 640; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 641; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 642; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 643; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 644; 645; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max: 646; GFX11-GISEL: ; %bb.0: 647; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 648; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 649; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 650; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 651; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 652; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_24bit_max: 655; GFX12-GISEL: ; %bb.0: 656; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 657; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 658; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 659; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 660; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 661; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 662; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 663; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 664; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 665; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max: 668; GFX9-SDAG: ; %bb.0: 669; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff000, v0 671; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 672; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4094 673; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 674; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 675; 676; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max: 677; GFX10-SDAG: ; %bb.0: 678; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff800, v0 680; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 681; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2046 682; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 683; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max: 686; GFX11-SDAG: ; %bb.0: 687; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 689; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 690; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4094 691; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 692; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 693; 694; GFX12-SDAG-LABEL: global_inst_valu_offset_2x_24bit_max: 695; GFX12-SDAG: ; %bb.0: 696; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 697; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 698; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 699; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 700; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 701; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 702; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 703; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:8388606 704; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 705; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 706 %gep = getelementptr i8, ptr addrspace(1) %p, i64 16777214 707 %load = load i8, ptr addrspace(1) %gep, align 4 708 ret i8 %load 709} 710 711define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { 712; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 713; GFX9: ; %bb.0: 714; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 715; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096 716; GFX9-NEXT: s_waitcnt vmcnt(0) 717; GFX9-NEXT: s_setpc_b64 s[30:31] 718; 719; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 720; GFX10: ; %bb.0: 721; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 723; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 724; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 725; GFX10-NEXT: s_waitcnt vmcnt(0) 726; GFX10-NEXT: s_setpc_b64 s[30:31] 727; 728; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 729; GFX11: ; %bb.0: 730; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 731; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 732; GFX11-NEXT: s_waitcnt vmcnt(0) 733; GFX11-NEXT: s_setpc_b64 s[30:31] 734; 735; GFX12-LABEL: global_inst_valu_offset_2x_neg_11bit_max: 736; GFX12: ; %bb.0: 737; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 738; GFX12-NEXT: s_wait_expcnt 0x0 739; GFX12-NEXT: s_wait_samplecnt 0x0 740; GFX12-NEXT: s_wait_bvhcnt 0x0 741; GFX12-NEXT: s_wait_kmcnt 0x0 742; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-4096 743; GFX12-NEXT: s_wait_loadcnt 0x0 744; GFX12-NEXT: s_setpc_b64 s[30:31] 745 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 746 %load = load i8, ptr addrspace(1) %gep, align 4 747 ret i8 %load 748} 749 750define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) { 751; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 752; GFX9: ; %bb.0: 753; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 755; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 756; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 757; GFX9-NEXT: s_waitcnt vmcnt(0) 758; GFX9-NEXT: s_setpc_b64 s[30:31] 759; 760; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 761; GFX10: ; %bb.0: 762; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 763; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 764; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 765; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 766; GFX10-NEXT: s_waitcnt vmcnt(0) 767; GFX10-NEXT: s_setpc_b64 s[30:31] 768; 769; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 770; GFX11: ; %bb.0: 771; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 772; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 773; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 774; GFX11-NEXT: global_load_u8 v0, v[0:1], off 775; GFX11-NEXT: s_waitcnt vmcnt(0) 776; GFX11-NEXT: s_setpc_b64 s[30:31] 777; 778; GFX12-LABEL: global_inst_valu_offset_2x_neg_12bit_max: 779; GFX12: ; %bb.0: 780; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 781; GFX12-NEXT: s_wait_expcnt 0x0 782; GFX12-NEXT: s_wait_samplecnt 0x0 783; GFX12-NEXT: s_wait_bvhcnt 0x0 784; GFX12-NEXT: s_wait_kmcnt 0x0 785; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-8192 786; GFX12-NEXT: s_wait_loadcnt 0x0 787; GFX12-NEXT: s_setpc_b64 s[30:31] 788 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 789 %load = load i8, ptr addrspace(1) %gep, align 4 790 ret i8 %load 791} 792 793define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) { 794; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 795; GFX9: ; %bb.0: 796; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 797; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 798; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 799; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 800; GFX9-NEXT: s_waitcnt vmcnt(0) 801; GFX9-NEXT: s_setpc_b64 s[30:31] 802; 803; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 804; GFX10: ; %bb.0: 805; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 806; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 807; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 808; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 809; GFX10-NEXT: s_waitcnt vmcnt(0) 810; GFX10-NEXT: s_setpc_b64 s[30:31] 811; 812; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 813; GFX11: ; %bb.0: 814; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 815; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 816; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 817; GFX11-NEXT: global_load_u8 v0, v[0:1], off 818; GFX11-NEXT: s_waitcnt vmcnt(0) 819; GFX11-NEXT: s_setpc_b64 s[30:31] 820; 821; GFX12-LABEL: global_inst_valu_offset_2x_neg_13bit_max: 822; GFX12: ; %bb.0: 823; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 824; GFX12-NEXT: s_wait_expcnt 0x0 825; GFX12-NEXT: s_wait_samplecnt 0x0 826; GFX12-NEXT: s_wait_bvhcnt 0x0 827; GFX12-NEXT: s_wait_kmcnt 0x0 828; GFX12-NEXT: global_load_u8 v0, v[0:1], off offset:-16384 829; GFX12-NEXT: s_wait_loadcnt 0x0 830; GFX12-NEXT: s_setpc_b64 s[30:31] 831 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384 832 %load = load i8, ptr addrspace(1) %gep, align 4 833 ret i8 %load 834} 835 836define i8 @global_inst_valu_offset_2x_neg_24bit_max(ptr addrspace(1) %p) { 837; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 838; GFX9-GISEL: ; %bb.0: 839; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 840; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xff000001, v0 841; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 842; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 843; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 844; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 847; GFX10-GISEL: ; %bb.0: 848; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 850; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 851; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 852; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 853; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 854; 855; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 856; GFX11-GISEL: ; %bb.0: 857; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 858; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 859; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 860; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 861; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 862; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 863; 864; GFX12-GISEL-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 865; GFX12-GISEL: ; %bb.0: 866; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 867; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 868; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 869; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 870; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 871; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 872; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 873; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 874; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 875; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 876; 877; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 878; GFX9-SDAG: ; %bb.0: 879; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 880; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xff001000, v0 881; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 882; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-4095 883; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 884; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 885; 886; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 887; GFX10-SDAG: ; %bb.0: 888; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 889; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000800, v0 890; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 891; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-2047 892; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 893; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 894; 895; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 896; GFX11-SDAG: ; %bb.0: 897; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 898; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff001000, v0 899; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 900; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-4095 901; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 902; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 903; 904; GFX12-SDAG-LABEL: global_inst_valu_offset_2x_neg_24bit_max: 905; GFX12-SDAG: ; %bb.0: 906; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 907; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 908; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 909; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 910; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 911; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 912; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 913; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8388607 914; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 915; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 916 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16777215 917 %load = load i8, ptr addrspace(1) %gep, align 4 918 ret i8 %load 919} 920 921 922; Fill 11-bit low-bits (1ull << 33) | 2047 923define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) { 924; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: 925; GFX9-GISEL: ; %bb.0: 926; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 927; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 928; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 929; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 930; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 931; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 932; 933; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: 934; GFX10-GISEL: ; %bb.0: 935; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 937; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 938; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 939; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 940; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 941; 942; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: 943; GFX11-GISEL: ; %bb.0: 944; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 945; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 946; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 947; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 948; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 949; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0: 952; GFX12-GISEL: ; %bb.0: 953; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 954; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 955; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 956; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 957; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 958; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 959; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 960; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 961; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 962; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 963; 964; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: 965; GFX9-SDAG: ; %bb.0: 966; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 967; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 968; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 969; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 970; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 971; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 972; 973; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: 974; GFX10-SDAG: ; %bb.0: 975; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 977; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 978; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 979; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 980; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 981; 982; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: 983; GFX11-SDAG: ; %bb.0: 984; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 986; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 987; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 988; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 989; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 990; 991; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0: 992; GFX12-SDAG: ; %bb.0: 993; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 994; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 995; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 996; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 997; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 998; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 999; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1000; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 1001; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1002; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1003 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639 1004 %load = load i8, ptr addrspace(1) %gep, align 4 1005 ret i8 %load 1006} 1007 1008; Fill 11-bit low-bits (1ull << 33) | 2048 1009define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) { 1010; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1011; GFX9-GISEL: ; %bb.0: 1012; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1013; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1014; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1015; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1016; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1017; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1018; 1019; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1020; GFX10: ; %bb.0: 1021; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1022; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1023; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1024; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1025; GFX10-NEXT: s_waitcnt vmcnt(0) 1026; GFX10-NEXT: s_setpc_b64 s[30:31] 1027; 1028; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1029; GFX11-GISEL: ; %bb.0: 1030; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1031; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1032; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1033; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1034; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1035; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1036; 1037; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1038; GFX12-GISEL: ; %bb.0: 1039; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1040; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1041; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1042; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1043; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1044; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1045; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1046; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1047; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1048; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1049; 1050; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1051; GFX9-SDAG: ; %bb.0: 1052; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1053; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 1054; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1055; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2048 1056; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1057; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1058; 1059; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1060; GFX11-SDAG: ; %bb.0: 1061; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1062; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1063; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1064; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 1065; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1066; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1067; 1068; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1: 1069; GFX12-SDAG: ; %bb.0: 1070; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1071; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1072; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1073; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1074; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1075; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1076; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1077; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 1078; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1079; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1080 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640 1081 %load = load i8, ptr addrspace(1) %gep, align 4 1082 ret i8 %load 1083} 1084 1085; Fill 12-bit low-bits (1ull << 33) | 4095 1086define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) { 1087; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1088; GFX9-GISEL: ; %bb.0: 1089; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1090; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1091; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1092; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1093; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1094; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1095; 1096; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1097; GFX10-GISEL: ; %bb.0: 1098; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1099; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1100; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1101; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1102; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1103; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1104; 1105; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1106; GFX11-GISEL: ; %bb.0: 1107; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1108; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1109; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1110; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1111; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1112; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1113; 1114; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1115; GFX12-GISEL: ; %bb.0: 1116; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1117; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1118; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1119; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1120; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1121; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1122; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1123; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1124; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1125; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1126; 1127; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1128; GFX9-SDAG: ; %bb.0: 1129; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1130; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 1131; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1132; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 1133; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1134; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1135; 1136; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1137; GFX10-SDAG: ; %bb.0: 1138; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1139; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1140; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1141; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 1142; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1143; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1144; 1145; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1146; GFX11-SDAG: ; %bb.0: 1147; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1148; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1149; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1150; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 1151; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1152; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1153; 1154; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0: 1155; GFX12-SDAG: ; %bb.0: 1156; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1157; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1158; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1159; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1160; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1161; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1162; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1163; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 1164; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1165; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1166 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687 1167 %load = load i8, ptr addrspace(1) %gep, align 4 1168 ret i8 %load 1169} 1170 1171; Fill 12-bit low-bits (1ull << 33) | 4096 1172define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) { 1173; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1: 1174; GFX9: ; %bb.0: 1175; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1177; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1178; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1179; GFX9-NEXT: s_waitcnt vmcnt(0) 1180; GFX9-NEXT: s_setpc_b64 s[30:31] 1181; 1182; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1: 1183; GFX10: ; %bb.0: 1184; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1186; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1187; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1188; GFX10-NEXT: s_waitcnt vmcnt(0) 1189; GFX10-NEXT: s_setpc_b64 s[30:31] 1190; 1191; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_split1: 1192; GFX11: ; %bb.0: 1193; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1194; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1195; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1196; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1197; GFX11-NEXT: s_waitcnt vmcnt(0) 1198; GFX11-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1: 1201; GFX12-GISEL: ; %bb.0: 1202; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1203; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1204; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1205; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1206; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1207; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1208; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1209; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1210; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1211; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1212; 1213; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1: 1214; GFX12-SDAG: ; %bb.0: 1215; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1216; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1217; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1218; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1219; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1220; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1221; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1222; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4096 1223; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1224; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1225 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688 1226 %load = load i8, ptr addrspace(1) %gep, align 4 1227 ret i8 %load 1228} 1229 1230; Fill 13-bit low-bits (1ull << 33) | 8191 1231define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) { 1232; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1233; GFX9-GISEL: ; %bb.0: 1234; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1235; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1236; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1237; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1238; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1239; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1240; 1241; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1242; GFX10-GISEL: ; %bb.0: 1243; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1244; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1245; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1246; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1247; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1248; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1249; 1250; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1251; GFX11-GISEL: ; %bb.0: 1252; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1253; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1254; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1255; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1256; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1257; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1258; 1259; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1260; GFX12-GISEL: ; %bb.0: 1261; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1262; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1263; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1264; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1265; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1266; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1267; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1268; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1269; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1270; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1273; GFX9-SDAG: ; %bb.0: 1274; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1276; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1277; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095 1278; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1279; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1280; 1281; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1282; GFX10-SDAG: ; %bb.0: 1283; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1284; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0 1285; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1286; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 1287; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1288; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1289; 1290; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1291; GFX11-SDAG: ; %bb.0: 1292; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1293; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1294; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1295; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 1296; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1297; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1298; 1299; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0: 1300; GFX12-SDAG: ; %bb.0: 1301; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1302; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1303; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1304; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1305; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1306; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1307; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1308; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:8191 1309; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1310; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1311 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783 1312 %load = load i8, ptr addrspace(1) %gep, align 4 1313 ret i8 %load 1314} 1315 1316; Fill 13-bit low-bits (1ull << 33) | 8192 1317define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) { 1318; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1: 1319; GFX9: ; %bb.0: 1320; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1321; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1322; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1323; GFX9-NEXT: global_load_ubyte v0, v[0:1], off 1324; GFX9-NEXT: s_waitcnt vmcnt(0) 1325; GFX9-NEXT: s_setpc_b64 s[30:31] 1326; 1327; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1: 1328; GFX10: ; %bb.0: 1329; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1331; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1332; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1333; GFX10-NEXT: s_waitcnt vmcnt(0) 1334; GFX10-NEXT: s_setpc_b64 s[30:31] 1335; 1336; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_split1: 1337; GFX11: ; %bb.0: 1338; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1339; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1340; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1341; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1342; GFX11-NEXT: s_waitcnt vmcnt(0) 1343; GFX11-NEXT: s_setpc_b64 s[30:31] 1344; 1345; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1: 1346; GFX12-GISEL: ; %bb.0: 1347; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1348; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1349; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1350; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1351; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1352; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1353; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1354; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1355; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1356; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1357; 1358; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1: 1359; GFX12-SDAG: ; %bb.0: 1360; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1361; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1362; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1363; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1364; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1365; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1366; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1367; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:8192 1368; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1369; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1370 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784 1371 %load = load i8, ptr addrspace(1) %gep, align 4 1372 ret i8 %load 1373} 1374 1375; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1376define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) { 1377; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1378; GFX9-GISEL: ; %bb.0: 1379; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1381; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 1382; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1383; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1384; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1385; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1386; 1387; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1388; GFX10-GISEL: ; %bb.0: 1389; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1390; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1391; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1392; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1393; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1394; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1395; 1396; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1397; GFX11-GISEL: ; %bb.0: 1398; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1399; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1400; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1401; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1402; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1403; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1404; 1405; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1406; GFX12-GISEL: ; %bb.0: 1407; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1408; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1409; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1410; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1411; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1412; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1413; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1414; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1415; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1416; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1417; 1418; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1419; GFX9-SDAG: ; %bb.0: 1420; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1422; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1423; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1424; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049 1425; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1426; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1427; 1428; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1429; GFX10-SDAG: ; %bb.0: 1430; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1432; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1433; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1434; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1435; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1436; 1437; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1438; GFX11-SDAG: ; %bb.0: 1439; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1440; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1441; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1442; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2049 1443; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1444; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1445; 1446; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0: 1447; GFX12-SDAG: ; %bb.0: 1448; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1449; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1450; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1451; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1452; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1453; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1454; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1455; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8386561 1456; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1457; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1458 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761 1459 %load = load i8, ptr addrspace(1) %gep, align 4 1460 ret i8 %load 1461} 1462 1463; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1464define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) { 1465; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1466; GFX9-GISEL: ; %bb.0: 1467; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1468; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1469; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1470; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1471; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1472; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1473; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1474; 1475; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1476; GFX10: ; %bb.0: 1477; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1478; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1479; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1480; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1481; GFX10-NEXT: s_waitcnt vmcnt(0) 1482; GFX10-NEXT: s_setpc_b64 s[30:31] 1483; 1484; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1485; GFX11-GISEL: ; %bb.0: 1486; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1487; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1488; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1489; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1490; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1491; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1492; 1493; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1494; GFX12-GISEL: ; %bb.0: 1495; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1496; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1497; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1498; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1499; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1500; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1501; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1502; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1503; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1504; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1507; GFX9-SDAG: ; %bb.0: 1508; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1510; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1511; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1512; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048 1513; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1514; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1515; 1516; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1517; GFX11-SDAG: ; %bb.0: 1518; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1519; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1520; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1521; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2048 1522; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1523; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1524; 1525; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1: 1526; GFX12-SDAG: ; %bb.0: 1527; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1528; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1529; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1530; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1531; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1532; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1533; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1534; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8386560 1535; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1536; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1537 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760 1538 %load = load i8, ptr addrspace(1) %gep, align 4 1539 ret i8 %load 1540} 1541 1542; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1543define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) { 1544; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1545; GFX9-GISEL: ; %bb.0: 1546; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1547; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1548; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1549; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1550; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1551; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1552; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1553; 1554; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1555; GFX10-GISEL: ; %bb.0: 1556; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1557; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1558; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1559; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1560; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1561; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1564; GFX11-GISEL: ; %bb.0: 1565; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1567; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1568; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1569; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1570; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1571; 1572; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1573; GFX12-GISEL: ; %bb.0: 1574; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1575; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1576; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1577; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1578; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1579; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1580; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1581; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1582; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1583; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1584; 1585; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1586; GFX9-SDAG: ; %bb.0: 1587; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1588; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1589; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1590; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1591; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1592; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1593; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1594; 1595; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1596; GFX10-SDAG: ; %bb.0: 1597; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1598; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1599; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1600; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1601; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1602; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1603; 1604; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1605; GFX11-SDAG: ; %bb.0: 1606; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1607; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1608; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1609; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 1610; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1611; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1612; 1613; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0: 1614; GFX12-SDAG: ; %bb.0: 1615; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1616; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1617; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1618; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1619; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1620; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1621; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1622; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8384513 1623; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1624; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1625 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713 1626 %load = load i8, ptr addrspace(1) %gep, align 4 1627 ret i8 %load 1628} 1629 1630; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1631define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) { 1632; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1633; GFX9-GISEL: ; %bb.0: 1634; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1635; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1636; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1637; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1638; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1639; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1640; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1641; 1642; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1643; GFX10: ; %bb.0: 1644; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1645; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1646; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1647; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1648; GFX10-NEXT: s_waitcnt vmcnt(0) 1649; GFX10-NEXT: s_setpc_b64 s[30:31] 1650; 1651; GFX11-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1652; GFX11: ; %bb.0: 1653; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1654; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1655; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1656; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1657; GFX11-NEXT: s_waitcnt vmcnt(0) 1658; GFX11-NEXT: s_setpc_b64 s[30:31] 1659; 1660; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1661; GFX12-GISEL: ; %bb.0: 1662; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1663; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1664; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1665; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1666; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1667; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1668; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1669; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1670; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1671; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1672; 1673; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1674; GFX9-SDAG: ; %bb.0: 1675; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1676; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1677; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1678; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1679; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off 1680; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1681; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1682; 1683; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1: 1684; GFX12-SDAG: ; %bb.0: 1685; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1686; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1687; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1688; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1689; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1690; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1691; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1692; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8384512 1693; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1694; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1695 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712 1696 %load = load i8, ptr addrspace(1) %gep, align 4 1697 ret i8 %load 1698} 1699 1700; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1701define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) { 1702; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1703; GFX9-GISEL: ; %bb.0: 1704; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1705; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1706; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1707; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1708; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1709; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1710; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1711; 1712; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1713; GFX10-GISEL: ; %bb.0: 1714; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1715; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1716; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1717; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1718; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 1719; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1722; GFX11-GISEL: ; %bb.0: 1723; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1725; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1726; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1727; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 1728; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1729; 1730; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1731; GFX12-GISEL: ; %bb.0: 1732; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1733; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1734; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1735; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1736; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1737; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1738; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1739; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1740; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1741; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1742; 1743; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1744; GFX9-SDAG: ; %bb.0: 1745; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1746; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1747; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1748; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1749; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1750; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1751; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1752; 1753; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1754; GFX10-SDAG: ; %bb.0: 1755; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1756; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1757; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1758; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1 1759; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 1760; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 1761; 1762; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1763; GFX11-SDAG: ; %bb.0: 1764; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1766; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1767; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1 1768; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1769; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1770; 1771; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0: 1772; GFX12-SDAG: ; %bb.0: 1773; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1774; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1775; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1776; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1777; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1778; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1779; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1780; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8380417 1781; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1782; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1783 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617 1784 %load = load i8, ptr addrspace(1) %gep, align 4 1785 ret i8 %load 1786} 1787 1788; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1789define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) { 1790; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1791; GFX9-GISEL: ; %bb.0: 1792; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1793; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1794; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1795; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1796; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off 1797; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) 1798; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1799; 1800; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1801; GFX10: ; %bb.0: 1802; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1803; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1804; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1805; GFX10-NEXT: global_load_ubyte v0, v[0:1], off 1806; GFX10-NEXT: s_waitcnt vmcnt(0) 1807; GFX10-NEXT: s_setpc_b64 s[30:31] 1808; 1809; GFX11-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1810; GFX11: ; %bb.0: 1811; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1812; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1813; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1814; GFX11-NEXT: global_load_u8 v0, v[0:1], off 1815; GFX11-NEXT: s_waitcnt vmcnt(0) 1816; GFX11-NEXT: s_setpc_b64 s[30:31] 1817; 1818; GFX12-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1819; GFX12-GISEL: ; %bb.0: 1820; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1821; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1822; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1823; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1824; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1825; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1826; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1827; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off 1828; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 1829; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1830; 1831; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1832; GFX9-SDAG: ; %bb.0: 1833; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1834; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1835; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1836; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1837; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off 1838; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1839; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1840; 1841; GFX12-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1: 1842; GFX12-SDAG: ; %bb.0: 1843; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1844; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1845; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1846; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1847; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1848; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1849; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1850; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-8380416 1851; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 1852; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1853 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616 1854 %load = load i8, ptr addrspace(1) %gep, align 4 1855 ret i8 %load 1856} 1857 1858define amdgpu_kernel void @global_inst_salu_offset_1(ptr addrspace(1) %p) { 1859; GFX9-LABEL: global_inst_salu_offset_1: 1860; GFX9: ; %bb.0: 1861; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1862; GFX9-NEXT: v_mov_b32_e32 v0, 0 1863; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1864; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc 1865; GFX9-NEXT: s_waitcnt vmcnt(0) 1866; GFX9-NEXT: global_store_byte v[0:1], v0, off 1867; GFX9-NEXT: s_endpgm 1868; 1869; GFX10-LABEL: global_inst_salu_offset_1: 1870; GFX10: ; %bb.0: 1871; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1872; GFX10-NEXT: v_mov_b32_e32 v0, 0 1873; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1874; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc 1875; GFX10-NEXT: s_waitcnt vmcnt(0) 1876; GFX10-NEXT: global_store_byte v[0:1], v0, off 1877; GFX10-NEXT: s_endpgm 1878; 1879; GFX11-LABEL: global_inst_salu_offset_1: 1880; GFX11: ; %bb.0: 1881; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1882; GFX11-NEXT: v_mov_b32_e32 v0, 0 1883; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1884; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc 1885; GFX11-NEXT: s_waitcnt vmcnt(0) 1886; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1887; GFX11-NEXT: s_endpgm 1888; 1889; GFX12-LABEL: global_inst_salu_offset_1: 1890; GFX12: ; %bb.0: 1891; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1892; GFX12-NEXT: v_mov_b32_e32 v0, 0 1893; GFX12-NEXT: s_wait_kmcnt 0x0 1894; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 scope:SCOPE_SYS 1895; GFX12-NEXT: s_wait_loadcnt 0x0 1896; GFX12-NEXT: global_store_b8 v[0:1], v0, off 1897; GFX12-NEXT: s_endpgm 1898 %gep = getelementptr i8, ptr addrspace(1) %p, i64 1 1899 %load = load volatile i8, ptr addrspace(1) %gep, align 1 1900 store i8 %load, ptr addrspace(1) undef 1901 ret void 1902} 1903 1904define amdgpu_kernel void @global_inst_salu_offset_11bit_max(ptr addrspace(1) %p) { 1905; GFX9-LABEL: global_inst_salu_offset_11bit_max: 1906; GFX9: ; %bb.0: 1907; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1908; GFX9-NEXT: v_mov_b32_e32 v0, 0 1909; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1910; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc 1911; GFX9-NEXT: s_waitcnt vmcnt(0) 1912; GFX9-NEXT: global_store_byte v[0:1], v0, off 1913; GFX9-NEXT: s_endpgm 1914; 1915; GFX10-LABEL: global_inst_salu_offset_11bit_max: 1916; GFX10: ; %bb.0: 1917; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1918; GFX10-NEXT: v_mov_b32_e32 v0, 0 1919; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1920; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 1921; GFX10-NEXT: s_waitcnt vmcnt(0) 1922; GFX10-NEXT: global_store_byte v[0:1], v0, off 1923; GFX10-NEXT: s_endpgm 1924; 1925; GFX11-LABEL: global_inst_salu_offset_11bit_max: 1926; GFX11: ; %bb.0: 1927; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1928; GFX11-NEXT: v_mov_b32_e32 v0, 0 1929; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1930; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc 1931; GFX11-NEXT: s_waitcnt vmcnt(0) 1932; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1933; GFX11-NEXT: s_endpgm 1934; 1935; GFX12-LABEL: global_inst_salu_offset_11bit_max: 1936; GFX12: ; %bb.0: 1937; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1938; GFX12-NEXT: v_mov_b32_e32 v0, 0 1939; GFX12-NEXT: s_wait_kmcnt 0x0 1940; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 scope:SCOPE_SYS 1941; GFX12-NEXT: s_wait_loadcnt 0x0 1942; GFX12-NEXT: global_store_b8 v[0:1], v0, off 1943; GFX12-NEXT: s_endpgm 1944 %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047 1945 %load = load volatile i8, ptr addrspace(1) %gep, align 1 1946 store i8 %load, ptr addrspace(1) undef 1947 ret void 1948} 1949 1950define amdgpu_kernel void @global_inst_salu_offset_12bit_max(ptr addrspace(1) %p) { 1951; GFX9-LABEL: global_inst_salu_offset_12bit_max: 1952; GFX9: ; %bb.0: 1953; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1954; GFX9-NEXT: v_mov_b32_e32 v0, 0 1955; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1956; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 1957; GFX9-NEXT: s_waitcnt vmcnt(0) 1958; GFX9-NEXT: global_store_byte v[0:1], v0, off 1959; GFX9-NEXT: s_endpgm 1960; 1961; GFX10-LABEL: global_inst_salu_offset_12bit_max: 1962; GFX10: ; %bb.0: 1963; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1964; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 1965; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1966; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 1967; GFX10-NEXT: s_waitcnt vmcnt(0) 1968; GFX10-NEXT: global_store_byte v[0:1], v0, off 1969; GFX10-NEXT: s_endpgm 1970; 1971; GFX11-LABEL: global_inst_salu_offset_12bit_max: 1972; GFX11: ; %bb.0: 1973; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1974; GFX11-NEXT: v_mov_b32_e32 v0, 0 1975; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1976; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 1977; GFX11-NEXT: s_waitcnt vmcnt(0) 1978; GFX11-NEXT: global_store_b8 v[0:1], v0, off 1979; GFX11-NEXT: s_endpgm 1980; 1981; GFX12-LABEL: global_inst_salu_offset_12bit_max: 1982; GFX12: ; %bb.0: 1983; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1984; GFX12-NEXT: v_mov_b32_e32 v0, 0 1985; GFX12-NEXT: s_wait_kmcnt 0x0 1986; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 scope:SCOPE_SYS 1987; GFX12-NEXT: s_wait_loadcnt 0x0 1988; GFX12-NEXT: global_store_b8 v[0:1], v0, off 1989; GFX12-NEXT: s_endpgm 1990 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095 1991 %load = load volatile i8, ptr addrspace(1) %gep, align 1 1992 store i8 %load, ptr addrspace(1) undef 1993 ret void 1994} 1995 1996define amdgpu_kernel void @global_inst_salu_offset_13bit_max(ptr addrspace(1) %p) { 1997; GFX9-LABEL: global_inst_salu_offset_13bit_max: 1998; GFX9: ; %bb.0: 1999; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2000; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 2001; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2002; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 2003; GFX9-NEXT: s_waitcnt vmcnt(0) 2004; GFX9-NEXT: global_store_byte v[0:1], v0, off 2005; GFX9-NEXT: s_endpgm 2006; 2007; GFX10-LABEL: global_inst_salu_offset_13bit_max: 2008; GFX10: ; %bb.0: 2009; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2010; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 2011; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2012; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 2013; GFX10-NEXT: s_waitcnt vmcnt(0) 2014; GFX10-NEXT: global_store_byte v[0:1], v0, off 2015; GFX10-NEXT: s_endpgm 2016; 2017; GFX11-LABEL: global_inst_salu_offset_13bit_max: 2018; GFX11: ; %bb.0: 2019; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2020; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 2021; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2022; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 2023; GFX11-NEXT: s_waitcnt vmcnt(0) 2024; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2025; GFX11-NEXT: s_endpgm 2026; 2027; GFX12-LABEL: global_inst_salu_offset_13bit_max: 2028; GFX12: ; %bb.0: 2029; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2030; GFX12-NEXT: v_mov_b32_e32 v0, 0 2031; GFX12-NEXT: s_wait_kmcnt 0x0 2032; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:8191 scope:SCOPE_SYS 2033; GFX12-NEXT: s_wait_loadcnt 0x0 2034; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2035; GFX12-NEXT: s_endpgm 2036 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191 2037 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2038 store i8 %load, ptr addrspace(1) undef 2039 ret void 2040} 2041 2042define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(ptr addrspace(1) %p) { 2043; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max: 2044; GFX9: ; %bb.0: 2045; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2046; GFX9-NEXT: v_mov_b32_e32 v0, 0 2047; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2048; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc 2049; GFX9-NEXT: s_waitcnt vmcnt(0) 2050; GFX9-NEXT: global_store_byte v[0:1], v0, off 2051; GFX9-NEXT: s_endpgm 2052; 2053; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max: 2054; GFX10: ; %bb.0: 2055; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2056; GFX10-NEXT: v_mov_b32_e32 v0, 0 2057; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2058; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc 2059; GFX10-NEXT: s_waitcnt vmcnt(0) 2060; GFX10-NEXT: global_store_byte v[0:1], v0, off 2061; GFX10-NEXT: s_endpgm 2062; 2063; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max: 2064; GFX11: ; %bb.0: 2065; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2066; GFX11-NEXT: v_mov_b32_e32 v0, 0 2067; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2068; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc 2069; GFX11-NEXT: s_waitcnt vmcnt(0) 2070; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2071; GFX11-NEXT: s_endpgm 2072; 2073; GFX12-LABEL: global_inst_salu_offset_neg_11bit_max: 2074; GFX12: ; %bb.0: 2075; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2076; GFX12-NEXT: v_mov_b32_e32 v0, 0 2077; GFX12-NEXT: s_wait_kmcnt 0x0 2078; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 scope:SCOPE_SYS 2079; GFX12-NEXT: s_wait_loadcnt 0x0 2080; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2081; GFX12-NEXT: s_endpgm 2082 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048 2083 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2084 store i8 %load, ptr addrspace(1) undef 2085 ret void 2086} 2087 2088define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(ptr addrspace(1) %p) { 2089; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max: 2090; GFX9: ; %bb.0: 2091; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2092; GFX9-NEXT: v_mov_b32_e32 v0, 0 2093; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2094; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 2095; GFX9-NEXT: s_waitcnt vmcnt(0) 2096; GFX9-NEXT: global_store_byte v[0:1], v0, off 2097; GFX9-NEXT: s_endpgm 2098; 2099; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_12bit_max: 2100; GFX10-GISEL: ; %bb.0: 2101; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2102; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2103; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2104; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1 2105; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2106; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2107; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2108; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2109; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2110; GFX10-GISEL-NEXT: s_endpgm 2111; 2112; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max: 2113; GFX11: ; %bb.0: 2114; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2115; GFX11-NEXT: v_mov_b32_e32 v0, 0 2116; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2117; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc 2118; GFX11-NEXT: s_waitcnt vmcnt(0) 2119; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2120; GFX11-NEXT: s_endpgm 2121; 2122; GFX12-LABEL: global_inst_salu_offset_neg_12bit_max: 2123; GFX12: ; %bb.0: 2124; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2125; GFX12-NEXT: v_mov_b32_e32 v0, 0 2126; GFX12-NEXT: s_wait_kmcnt 0x0 2127; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 scope:SCOPE_SYS 2128; GFX12-NEXT: s_wait_loadcnt 0x0 2129; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2130; GFX12-NEXT: s_endpgm 2131; 2132; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_12bit_max: 2133; GFX10-SDAG: ; %bb.0: 2134; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2135; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2136; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 2137; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 2138; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2139; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2140; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2141; GFX10-SDAG-NEXT: s_endpgm 2142 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 2143 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2144 store i8 %load, ptr addrspace(1) undef 2145 ret void 2146} 2147 2148define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(ptr addrspace(1) %p) { 2149; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max: 2150; GFX9: ; %bb.0: 2151; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2152; GFX9-NEXT: v_mov_b32_e32 v0, 0 2153; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2154; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 2155; GFX9-NEXT: s_addc_u32 s1, s1, -1 2156; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2157; GFX9-NEXT: s_waitcnt vmcnt(0) 2158; GFX9-NEXT: global_store_byte v[0:1], v0, off 2159; GFX9-NEXT: s_endpgm 2160; 2161; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max: 2162; GFX10-GISEL: ; %bb.0: 2163; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2164; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2165; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2166; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1 2167; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2168; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2169; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2170; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2171; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2172; GFX10-GISEL-NEXT: s_endpgm 2173; 2174; GFX11-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max: 2175; GFX11-GISEL: ; %bb.0: 2176; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2177; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2178; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2179; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2180; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2181; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2182; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2183; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2184; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2185; GFX11-GISEL-NEXT: s_endpgm 2186; 2187; GFX12-LABEL: global_inst_salu_offset_neg_13bit_max: 2188; GFX12: ; %bb.0: 2189; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2190; GFX12-NEXT: v_mov_b32_e32 v0, 0 2191; GFX12-NEXT: s_wait_kmcnt 0x0 2192; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-8192 scope:SCOPE_SYS 2193; GFX12-NEXT: s_wait_loadcnt 0x0 2194; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2195; GFX12-NEXT: s_endpgm 2196; 2197; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max: 2198; GFX10-SDAG: ; %bb.0: 2199; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2200; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2201; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2202; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 2203; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2204; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2205; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2206; GFX10-SDAG-NEXT: s_endpgm 2207; 2208; GFX11-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max: 2209; GFX11-SDAG: ; %bb.0: 2210; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2211; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2212; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2213; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2214; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2215; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2216; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2217; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2218; GFX11-SDAG-NEXT: s_endpgm 2219 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 2220 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2221 store i8 %load, ptr addrspace(1) undef 2222 ret void 2223} 2224 2225define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(ptr addrspace(1) %p) { 2226; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max: 2227; GFX9: ; %bb.0: 2228; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2229; GFX9-NEXT: v_mov_b32_e32 v0, 0 2230; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2231; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 2232; GFX9-NEXT: s_waitcnt vmcnt(0) 2233; GFX9-NEXT: global_store_byte v[0:1], v0, off 2234; GFX9-NEXT: s_endpgm 2235; 2236; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max: 2237; GFX10: ; %bb.0: 2238; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2239; GFX10-NEXT: v_mov_b32_e32 v0, 0x800 2240; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2241; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 2242; GFX10-NEXT: s_waitcnt vmcnt(0) 2243; GFX10-NEXT: global_store_byte v[0:1], v0, off 2244; GFX10-NEXT: s_endpgm 2245; 2246; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max: 2247; GFX11: ; %bb.0: 2248; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2249; GFX11-NEXT: v_mov_b32_e32 v0, 0 2250; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2251; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 2252; GFX11-NEXT: s_waitcnt vmcnt(0) 2253; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2254; GFX11-NEXT: s_endpgm 2255; 2256; GFX12-LABEL: global_inst_salu_offset_2x_11bit_max: 2257; GFX12: ; %bb.0: 2258; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2259; GFX12-NEXT: v_mov_b32_e32 v0, 0 2260; GFX12-NEXT: s_wait_kmcnt 0x0 2261; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 scope:SCOPE_SYS 2262; GFX12-NEXT: s_wait_loadcnt 0x0 2263; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2264; GFX12-NEXT: s_endpgm 2265 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095 2266 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2267 store i8 %load, ptr addrspace(1) undef 2268 ret void 2269} 2270 2271define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(ptr addrspace(1) %p) { 2272; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max: 2273; GFX9: ; %bb.0: 2274; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2275; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000 2276; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2277; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 2278; GFX9-NEXT: s_waitcnt vmcnt(0) 2279; GFX9-NEXT: global_store_byte v[0:1], v0, off 2280; GFX9-NEXT: s_endpgm 2281; 2282; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max: 2283; GFX10: ; %bb.0: 2284; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2285; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800 2286; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2287; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 2288; GFX10-NEXT: s_waitcnt vmcnt(0) 2289; GFX10-NEXT: global_store_byte v[0:1], v0, off 2290; GFX10-NEXT: s_endpgm 2291; 2292; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max: 2293; GFX11: ; %bb.0: 2294; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2295; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 2296; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2297; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 2298; GFX11-NEXT: s_waitcnt vmcnt(0) 2299; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2300; GFX11-NEXT: s_endpgm 2301; 2302; GFX12-LABEL: global_inst_salu_offset_2x_12bit_max: 2303; GFX12: ; %bb.0: 2304; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2305; GFX12-NEXT: v_mov_b32_e32 v0, 0 2306; GFX12-NEXT: s_wait_kmcnt 0x0 2307; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:8191 scope:SCOPE_SYS 2308; GFX12-NEXT: s_wait_loadcnt 0x0 2309; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2310; GFX12-NEXT: s_endpgm 2311 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191 2312 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2313 store i8 %load, ptr addrspace(1) undef 2314 ret void 2315} 2316 2317define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(ptr addrspace(1) %p) { 2318; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max: 2319; GFX9: ; %bb.0: 2320; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2321; GFX9-NEXT: v_mov_b32_e32 v0, 0x3000 2322; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2323; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc 2324; GFX9-NEXT: s_waitcnt vmcnt(0) 2325; GFX9-NEXT: global_store_byte v[0:1], v0, off 2326; GFX9-NEXT: s_endpgm 2327; 2328; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max: 2329; GFX10: ; %bb.0: 2330; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2331; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 2332; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2333; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc 2334; GFX10-NEXT: s_waitcnt vmcnt(0) 2335; GFX10-NEXT: global_store_byte v[0:1], v0, off 2336; GFX10-NEXT: s_endpgm 2337; 2338; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max: 2339; GFX11: ; %bb.0: 2340; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2341; GFX11-NEXT: v_mov_b32_e32 v0, 0x3000 2342; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2343; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc 2344; GFX11-NEXT: s_waitcnt vmcnt(0) 2345; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2346; GFX11-NEXT: s_endpgm 2347; 2348; GFX12-LABEL: global_inst_salu_offset_2x_13bit_max: 2349; GFX12: ; %bb.0: 2350; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2351; GFX12-NEXT: v_mov_b32_e32 v0, 0 2352; GFX12-NEXT: s_wait_kmcnt 0x0 2353; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:16383 scope:SCOPE_SYS 2354; GFX12-NEXT: s_wait_loadcnt 0x0 2355; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2356; GFX12-NEXT: s_endpgm 2357 %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383 2358 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2359 store i8 %load, ptr addrspace(1) undef 2360 ret void 2361} 2362 2363define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) { 2364; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 2365; GFX9: ; %bb.0: 2366; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2367; GFX9-NEXT: v_mov_b32_e32 v0, 0 2368; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2369; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc 2370; GFX9-NEXT: s_waitcnt vmcnt(0) 2371; GFX9-NEXT: global_store_byte v[0:1], v0, off 2372; GFX9-NEXT: s_endpgm 2373; 2374; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 2375; GFX10-GISEL: ; %bb.0: 2376; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2377; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2378; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2379; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1 2380; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2381; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2382; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2383; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2384; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2385; GFX10-GISEL-NEXT: s_endpgm 2386; 2387; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 2388; GFX11: ; %bb.0: 2389; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2390; GFX11-NEXT: v_mov_b32_e32 v0, 0 2391; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2392; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc 2393; GFX11-NEXT: s_waitcnt vmcnt(0) 2394; GFX11-NEXT: global_store_b8 v[0:1], v0, off 2395; GFX11-NEXT: s_endpgm 2396; 2397; GFX12-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 2398; GFX12: ; %bb.0: 2399; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2400; GFX12-NEXT: v_mov_b32_e32 v0, 0 2401; GFX12-NEXT: s_wait_kmcnt 0x0 2402; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 scope:SCOPE_SYS 2403; GFX12-NEXT: s_wait_loadcnt 0x0 2404; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2405; GFX12-NEXT: s_endpgm 2406; 2407; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_11bit_max: 2408; GFX10-SDAG: ; %bb.0: 2409; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2410; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2411; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 2412; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 2413; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2414; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2415; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2416; GFX10-SDAG-NEXT: s_endpgm 2417 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096 2418 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2419 store i8 %load, ptr addrspace(1) undef 2420 ret void 2421} 2422 2423define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) { 2424; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2425; GFX9: ; %bb.0: 2426; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2427; GFX9-NEXT: v_mov_b32_e32 v0, 0 2428; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2429; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000 2430; GFX9-NEXT: s_addc_u32 s1, s1, -1 2431; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2432; GFX9-NEXT: s_waitcnt vmcnt(0) 2433; GFX9-NEXT: global_store_byte v[0:1], v0, off 2434; GFX9-NEXT: s_endpgm 2435; 2436; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2437; GFX10-GISEL: ; %bb.0: 2438; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2439; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2440; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2441; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1 2442; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2443; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2444; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2445; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2446; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2447; GFX10-GISEL-NEXT: s_endpgm 2448; 2449; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2450; GFX11-GISEL: ; %bb.0: 2451; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2452; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2453; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2454; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2455; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2456; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2457; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2458; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2459; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2460; GFX11-GISEL-NEXT: s_endpgm 2461; 2462; GFX12-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2463; GFX12: ; %bb.0: 2464; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2465; GFX12-NEXT: v_mov_b32_e32 v0, 0 2466; GFX12-NEXT: s_wait_kmcnt 0x0 2467; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-8192 scope:SCOPE_SYS 2468; GFX12-NEXT: s_wait_loadcnt 0x0 2469; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2470; GFX12-NEXT: s_endpgm 2471; 2472; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2473; GFX10-SDAG: ; %bb.0: 2474; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2475; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2476; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2477; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 2478; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2479; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2480; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2481; GFX10-SDAG-NEXT: s_endpgm 2482; 2483; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max: 2484; GFX11-SDAG: ; %bb.0: 2485; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2486; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2487; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2488; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2489; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2490; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2491; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2492; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2493; GFX11-SDAG-NEXT: s_endpgm 2494 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192 2495 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2496 store i8 %load, ptr addrspace(1) undef 2497 ret void 2498} 2499 2500define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) { 2501; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2502; GFX9: ; %bb.0: 2503; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2504; GFX9-NEXT: v_mov_b32_e32 v0, 0 2505; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2506; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000 2507; GFX9-NEXT: s_addc_u32 s1, s1, -1 2508; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2509; GFX9-NEXT: s_waitcnt vmcnt(0) 2510; GFX9-NEXT: global_store_byte v[0:1], v0, off 2511; GFX9-NEXT: s_endpgm 2512; 2513; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2514; GFX10-GISEL: ; %bb.0: 2515; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2516; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2517; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000 2518; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1 2519; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2520; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2521; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2522; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2523; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2524; GFX10-GISEL-NEXT: s_endpgm 2525; 2526; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2527; GFX11-GISEL: ; %bb.0: 2528; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2529; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2530; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000 2531; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2532; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2533; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2534; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2535; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2536; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2537; GFX11-GISEL-NEXT: s_endpgm 2538; 2539; GFX12-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2540; GFX12: ; %bb.0: 2541; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2542; GFX12-NEXT: v_mov_b32_e32 v0, 0 2543; GFX12-NEXT: s_wait_kmcnt 0x0 2544; GFX12-NEXT: global_load_u8 v0, v0, s[0:1] offset:-16384 scope:SCOPE_SYS 2545; GFX12-NEXT: s_wait_loadcnt 0x0 2546; GFX12-NEXT: global_store_b8 v[0:1], v0, off 2547; GFX12-NEXT: s_endpgm 2548; 2549; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2550; GFX10-SDAG: ; %bb.0: 2551; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2552; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2553; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 2554; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0 2555; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2556; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2557; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2558; GFX10-SDAG-NEXT: s_endpgm 2559; 2560; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max: 2561; GFX11-SDAG: ; %bb.0: 2562; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2563; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2564; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 2565; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2566; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2567; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2568; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2569; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2570; GFX11-SDAG-NEXT: s_endpgm 2571 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384 2572 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2573 store i8 %load, ptr addrspace(1) undef 2574 ret void 2575} 2576 2577; Fill 11-bit low-bits (1ull << 33) | 2047 2578define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(ptr addrspace(1) %p) { 2579; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2580; GFX9: ; %bb.0: 2581; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2582; GFX9-NEXT: v_mov_b32_e32 v0, 0 2583; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2584; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 2585; GFX9-NEXT: s_addc_u32 s1, s1, 2 2586; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2587; GFX9-NEXT: s_waitcnt vmcnt(0) 2588; GFX9-NEXT: global_store_byte v[0:1], v0, off 2589; GFX9-NEXT: s_endpgm 2590; 2591; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2592; GFX10-GISEL: ; %bb.0: 2593; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2594; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2595; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 2596; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 2597; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2598; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2599; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2600; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2601; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2602; GFX10-GISEL-NEXT: s_endpgm 2603; 2604; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2605; GFX11-GISEL: ; %bb.0: 2606; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2607; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2608; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 2609; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2610; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2611; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2612; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2613; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2614; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2615; GFX11-GISEL-NEXT: s_endpgm 2616; 2617; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2618; GFX12-GISEL: ; %bb.0: 2619; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2620; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2621; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x7ff 2622; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2623; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2624; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2625; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 2626; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 2627; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2628; GFX12-GISEL-NEXT: s_endpgm 2629; 2630; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2631; GFX10-SDAG: ; %bb.0: 2632; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2633; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2634; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2635; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 2636; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 2637; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2638; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2639; GFX10-SDAG-NEXT: s_endpgm 2640; 2641; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2642; GFX11-SDAG: ; %bb.0: 2643; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2644; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2645; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2646; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2647; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2648; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc 2649; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2650; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2651; GFX11-SDAG-NEXT: s_endpgm 2652; 2653; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0: 2654; GFX12-SDAG: ; %bb.0: 2655; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2656; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2657; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2658; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2659; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2660; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 scope:SCOPE_SYS 2661; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 2662; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2663; GFX12-SDAG-NEXT: s_endpgm 2664 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639 2665 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2666 store i8 %load, ptr addrspace(1) undef 2667 ret void 2668} 2669 2670; Fill 11-bit low-bits (1ull << 33) | 2048 2671define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(ptr addrspace(1) %p) { 2672; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2673; GFX9: ; %bb.0: 2674; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2675; GFX9-NEXT: v_mov_b32_e32 v0, 0 2676; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2677; GFX9-NEXT: s_add_u32 s0, s0, 0x800 2678; GFX9-NEXT: s_addc_u32 s1, s1, 2 2679; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2680; GFX9-NEXT: s_waitcnt vmcnt(0) 2681; GFX9-NEXT: global_store_byte v[0:1], v0, off 2682; GFX9-NEXT: s_endpgm 2683; 2684; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2685; GFX10-GISEL: ; %bb.0: 2686; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2687; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2688; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x800 2689; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 2690; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2691; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2692; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2693; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2694; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2695; GFX10-GISEL-NEXT: s_endpgm 2696; 2697; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2698; GFX11-GISEL: ; %bb.0: 2699; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2700; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2701; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800 2702; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2703; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2704; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2705; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2706; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2707; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2708; GFX11-GISEL-NEXT: s_endpgm 2709; 2710; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2711; GFX12-GISEL: ; %bb.0: 2712; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2713; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2714; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800 2715; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2716; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2717; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2718; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 2719; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 2720; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2721; GFX12-GISEL-NEXT: s_endpgm 2722; 2723; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2724; GFX10-SDAG: ; %bb.0: 2725; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2726; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2727; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800, s0 2728; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 2729; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2730; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2731; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2732; GFX10-SDAG-NEXT: s_endpgm 2733; 2734; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2735; GFX11-SDAG: ; %bb.0: 2736; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2737; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2738; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2739; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2740; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2741; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc 2742; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2743; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2744; GFX11-SDAG-NEXT: s_endpgm 2745; 2746; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1: 2747; GFX12-SDAG: ; %bb.0: 2748; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2749; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2750; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2751; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2752; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2753; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 scope:SCOPE_SYS 2754; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 2755; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2756; GFX12-SDAG-NEXT: s_endpgm 2757 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640 2758 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2759 store i8 %load, ptr addrspace(1) undef 2760 ret void 2761} 2762 2763; Fill 12-bit low-bits (1ull << 33) | 4095 2764define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(ptr addrspace(1) %p) { 2765; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2766; GFX9: ; %bb.0: 2767; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2768; GFX9-NEXT: v_mov_b32_e32 v0, 0 2769; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2770; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 2771; GFX9-NEXT: s_addc_u32 s1, s1, 2 2772; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2773; GFX9-NEXT: s_waitcnt vmcnt(0) 2774; GFX9-NEXT: global_store_byte v[0:1], v0, off 2775; GFX9-NEXT: s_endpgm 2776; 2777; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2778; GFX10-GISEL: ; %bb.0: 2779; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2780; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2781; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 2782; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 2783; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2784; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2785; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2786; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2787; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2788; GFX10-GISEL-NEXT: s_endpgm 2789; 2790; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2791; GFX11-GISEL: ; %bb.0: 2792; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2793; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2794; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 2795; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2796; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2797; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2798; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2799; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2800; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2801; GFX11-GISEL-NEXT: s_endpgm 2802; 2803; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2804; GFX12-GISEL: ; %bb.0: 2805; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2806; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2807; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xfff 2808; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2809; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2810; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2811; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 2812; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 2813; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2814; GFX12-GISEL-NEXT: s_endpgm 2815; 2816; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2817; GFX10-SDAG: ; %bb.0: 2818; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2819; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2820; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800, s0 2821; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 2822; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 2823; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2824; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2825; GFX10-SDAG-NEXT: s_endpgm 2826; 2827; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2828; GFX11-SDAG: ; %bb.0: 2829; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2830; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2831; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2832; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2833; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2834; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc 2835; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2836; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2837; GFX11-SDAG-NEXT: s_endpgm 2838; 2839; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0: 2840; GFX12-SDAG: ; %bb.0: 2841; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2842; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2843; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2844; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2845; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2846; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 scope:SCOPE_SYS 2847; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 2848; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2849; GFX12-SDAG-NEXT: s_endpgm 2850 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687 2851 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2852 store i8 %load, ptr addrspace(1) undef 2853 ret void 2854} 2855 2856; Fill 12-bit low-bits (1ull << 33) | 4096 2857define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(ptr addrspace(1) %p) { 2858; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2859; GFX9: ; %bb.0: 2860; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2861; GFX9-NEXT: v_mov_b32_e32 v0, 0 2862; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2863; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 2864; GFX9-NEXT: s_addc_u32 s1, s1, 2 2865; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2866; GFX9-NEXT: s_waitcnt vmcnt(0) 2867; GFX9-NEXT: global_store_byte v[0:1], v0, off 2868; GFX9-NEXT: s_endpgm 2869; 2870; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2871; GFX10-GISEL: ; %bb.0: 2872; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2873; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2874; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 2875; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 2876; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2877; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2878; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2879; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2880; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2881; GFX10-GISEL-NEXT: s_endpgm 2882; 2883; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2884; GFX11-GISEL: ; %bb.0: 2885; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2886; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2887; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 2888; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2889; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2890; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2891; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2892; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2893; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2894; GFX11-GISEL-NEXT: s_endpgm 2895; 2896; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2897; GFX12-GISEL: ; %bb.0: 2898; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2899; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2900; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1000 2901; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2902; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2903; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2904; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 2905; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 2906; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2907; GFX12-GISEL-NEXT: s_endpgm 2908; 2909; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2910; GFX10-SDAG: ; %bb.0: 2911; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2912; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2913; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 2914; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 2915; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2916; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 2917; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 2918; GFX10-SDAG-NEXT: s_endpgm 2919; 2920; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2921; GFX11-SDAG: ; %bb.0: 2922; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2923; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2924; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 2925; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2926; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2927; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2928; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 2929; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2930; GFX11-SDAG-NEXT: s_endpgm 2931; 2932; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1: 2933; GFX12-SDAG: ; %bb.0: 2934; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2935; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2936; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2937; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2938; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2939; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4096 scope:SCOPE_SYS 2940; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 2941; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 2942; GFX12-SDAG-NEXT: s_endpgm 2943 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688 2944 %load = load volatile i8, ptr addrspace(1) %gep, align 1 2945 store i8 %load, ptr addrspace(1) undef 2946 ret void 2947} 2948 2949; Fill 13-bit low-bits (1ull << 33) | 8191 2950define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(ptr addrspace(1) %p) { 2951; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0: 2952; GFX9: ; %bb.0: 2953; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2954; GFX9-NEXT: v_mov_b32_e32 v0, 0 2955; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2956; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 2957; GFX9-NEXT: s_addc_u32 s1, s1, 2 2958; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 2959; GFX9-NEXT: s_waitcnt vmcnt(0) 2960; GFX9-NEXT: global_store_byte v[0:1], v0, off 2961; GFX9-NEXT: s_endpgm 2962; 2963; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0: 2964; GFX10-GISEL: ; %bb.0: 2965; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2966; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2967; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 2968; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 2969; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 2970; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 2971; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 2972; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 2973; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 2974; GFX10-GISEL-NEXT: s_endpgm 2975; 2976; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0: 2977; GFX11-GISEL: ; %bb.0: 2978; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2979; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2980; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 2981; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2982; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2983; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2984; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 2985; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 2986; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 2987; GFX11-GISEL-NEXT: s_endpgm 2988; 2989; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0: 2990; GFX12-GISEL: ; %bb.0: 2991; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2992; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2993; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1fff 2994; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2995; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2996; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2997; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 2998; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 2999; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3000; GFX12-GISEL-NEXT: s_endpgm 3001; 3002; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0: 3003; GFX10-SDAG: ; %bb.0: 3004; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3005; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3006; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1800, s0 3007; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 3008; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc 3009; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 3010; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 3011; GFX10-SDAG-NEXT: s_endpgm 3012; 3013; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0: 3014; GFX11-SDAG: ; %bb.0: 3015; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3016; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3017; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 3018; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3019; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3020; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc 3021; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3022; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3023; GFX11-SDAG-NEXT: s_endpgm 3024; 3025; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0: 3026; GFX12-SDAG: ; %bb.0: 3027; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3028; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3029; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 3030; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3031; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3032; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:8191 scope:SCOPE_SYS 3033; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3034; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3035; GFX12-SDAG-NEXT: s_endpgm 3036 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783 3037 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3038 store i8 %load, ptr addrspace(1) undef 3039 ret void 3040} 3041 3042; Fill 13-bit low-bits (1ull << 33) | 8192 3043define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(ptr addrspace(1) %p) { 3044; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3045; GFX9: ; %bb.0: 3046; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3047; GFX9-NEXT: v_mov_b32_e32 v0, 0 3048; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3049; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 3050; GFX9-NEXT: s_addc_u32 s1, s1, 2 3051; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3052; GFX9-NEXT: s_waitcnt vmcnt(0) 3053; GFX9-NEXT: global_store_byte v[0:1], v0, off 3054; GFX9-NEXT: s_endpgm 3055; 3056; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3057; GFX10-GISEL: ; %bb.0: 3058; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3059; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3060; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3061; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2 3062; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0 3063; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1 3064; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 3065; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) 3066; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off 3067; GFX10-GISEL-NEXT: s_endpgm 3068; 3069; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3070; GFX11-GISEL: ; %bb.0: 3071; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3072; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3073; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3074; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 3075; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3076; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3077; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc 3078; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 3079; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3080; GFX11-GISEL-NEXT: s_endpgm 3081; 3082; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3083; GFX12-GISEL: ; %bb.0: 3084; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3085; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3086; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x2000 3087; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 3088; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3089; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3090; GFX12-GISEL-NEXT: global_load_u8 v0, v[0:1], off scope:SCOPE_SYS 3091; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3092; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3093; GFX12-GISEL-NEXT: s_endpgm 3094; 3095; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3096; GFX10-SDAG: ; %bb.0: 3097; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3098; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3099; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 3100; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0 3101; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc 3102; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) 3103; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off 3104; GFX10-SDAG-NEXT: s_endpgm 3105; 3106; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3107; GFX11-SDAG: ; %bb.0: 3108; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3109; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3110; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 3111; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3112; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3113; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc 3114; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 3115; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3116; GFX11-SDAG-NEXT: s_endpgm 3117; 3118; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1: 3119; GFX12-SDAG: ; %bb.0: 3120; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3121; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3122; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 3123; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3124; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3125; GFX12-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:8192 scope:SCOPE_SYS 3126; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3127; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3128; GFX12-SDAG-NEXT: s_endpgm 3129 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784 3130 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3131 store i8 %load, ptr addrspace(1) undef 3132 ret void 3133} 3134 3135; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 3136define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) { 3137; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 3138; GFX9: ; %bb.0: 3139; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3140; GFX9-NEXT: v_mov_b32_e32 v0, 0 3141; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3142; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff 3143; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3144; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3145; GFX9-NEXT: s_waitcnt vmcnt(0) 3146; GFX9-NEXT: global_store_byte v[0:1], v0, off 3147; GFX9-NEXT: s_endpgm 3148; 3149; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 3150; GFX10: ; %bb.0: 3151; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3152; GFX10-NEXT: v_mov_b32_e32 v0, 0 3153; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3154; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 3155; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3156; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3157; GFX10-NEXT: s_waitcnt vmcnt(0) 3158; GFX10-NEXT: global_store_byte v[0:1], v0, off 3159; GFX10-NEXT: s_endpgm 3160; 3161; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 3162; GFX11: ; %bb.0: 3163; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3164; GFX11-NEXT: v_mov_b32_e32 v0, 0 3165; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3166; GFX11-NEXT: s_add_u32 s0, s0, 0x7ff 3167; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3168; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3169; GFX11-NEXT: s_waitcnt vmcnt(0) 3170; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3171; GFX11-NEXT: s_endpgm 3172; 3173; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 3174; GFX12-GISEL: ; %bb.0: 3175; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3176; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3177; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3178; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x7ff 3179; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3180; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3181; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3182; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3183; GFX12-GISEL-NEXT: s_endpgm 3184; 3185; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0: 3186; GFX12-SDAG: ; %bb.0: 3187; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3188; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x7ff 3189; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3190; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3191; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3192; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3193; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3194; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3195; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3196; GFX12-SDAG-NEXT: s_endpgm 3197 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761 3198 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3199 store i8 %load, ptr addrspace(1) undef 3200 ret void 3201} 3202 3203; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 3204define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) { 3205; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 3206; GFX9: ; %bb.0: 3207; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3208; GFX9-NEXT: v_mov_b32_e32 v0, 0 3209; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3210; GFX9-NEXT: s_add_u32 s0, s0, 0x800 3211; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3212; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3213; GFX9-NEXT: s_waitcnt vmcnt(0) 3214; GFX9-NEXT: global_store_byte v[0:1], v0, off 3215; GFX9-NEXT: s_endpgm 3216; 3217; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 3218; GFX10: ; %bb.0: 3219; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3220; GFX10-NEXT: v_mov_b32_e32 v0, 0 3221; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3222; GFX10-NEXT: s_add_u32 s0, s0, 0x800 3223; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3224; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3225; GFX10-NEXT: s_waitcnt vmcnt(0) 3226; GFX10-NEXT: global_store_byte v[0:1], v0, off 3227; GFX10-NEXT: s_endpgm 3228; 3229; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 3230; GFX11: ; %bb.0: 3231; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3232; GFX11-NEXT: v_mov_b32_e32 v0, 0 3233; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3234; GFX11-NEXT: s_add_u32 s0, s0, 0x800 3235; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3236; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3237; GFX11-NEXT: s_waitcnt vmcnt(0) 3238; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3239; GFX11-NEXT: s_endpgm 3240; 3241; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 3242; GFX12-GISEL: ; %bb.0: 3243; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3244; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3245; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3246; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800 3247; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3248; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3249; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3250; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3251; GFX12-GISEL-NEXT: s_endpgm 3252; 3253; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1: 3254; GFX12-SDAG: ; %bb.0: 3255; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3256; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x800 3257; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3258; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3259; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3260; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3261; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3262; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3263; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3264; GFX12-SDAG-NEXT: s_endpgm 3265 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760 3266 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3267 store i8 %load, ptr addrspace(1) undef 3268 ret void 3269} 3270 3271; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 3272define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) { 3273; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 3274; GFX9: ; %bb.0: 3275; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3276; GFX9-NEXT: v_mov_b32_e32 v0, 0 3277; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3278; GFX9-NEXT: s_add_u32 s0, s0, 0xfff 3279; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3280; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3281; GFX9-NEXT: s_waitcnt vmcnt(0) 3282; GFX9-NEXT: global_store_byte v[0:1], v0, off 3283; GFX9-NEXT: s_endpgm 3284; 3285; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 3286; GFX10: ; %bb.0: 3287; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3288; GFX10-NEXT: v_mov_b32_e32 v0, 0 3289; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3290; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 3291; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3292; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3293; GFX10-NEXT: s_waitcnt vmcnt(0) 3294; GFX10-NEXT: global_store_byte v[0:1], v0, off 3295; GFX10-NEXT: s_endpgm 3296; 3297; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 3298; GFX11: ; %bb.0: 3299; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3300; GFX11-NEXT: v_mov_b32_e32 v0, 0 3301; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3302; GFX11-NEXT: s_add_u32 s0, s0, 0xfff 3303; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3304; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3305; GFX11-NEXT: s_waitcnt vmcnt(0) 3306; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3307; GFX11-NEXT: s_endpgm 3308; 3309; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 3310; GFX12-GISEL: ; %bb.0: 3311; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3312; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3313; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3314; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xfff 3315; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3316; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3317; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3318; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3319; GFX12-GISEL-NEXT: s_endpgm 3320; 3321; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0: 3322; GFX12-SDAG: ; %bb.0: 3323; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3324; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xfff 3325; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3326; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3327; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3328; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3329; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3330; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3331; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3332; GFX12-SDAG-NEXT: s_endpgm 3333 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713 3334 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3335 store i8 %load, ptr addrspace(1) undef 3336 ret void 3337} 3338 3339; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 3340define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) { 3341; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 3342; GFX9: ; %bb.0: 3343; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3344; GFX9-NEXT: v_mov_b32_e32 v0, 0 3345; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3346; GFX9-NEXT: s_add_u32 s0, s0, 0x1000 3347; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3348; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3349; GFX9-NEXT: s_waitcnt vmcnt(0) 3350; GFX9-NEXT: global_store_byte v[0:1], v0, off 3351; GFX9-NEXT: s_endpgm 3352; 3353; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 3354; GFX10: ; %bb.0: 3355; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3356; GFX10-NEXT: v_mov_b32_e32 v0, 0 3357; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3358; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 3359; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3360; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3361; GFX10-NEXT: s_waitcnt vmcnt(0) 3362; GFX10-NEXT: global_store_byte v[0:1], v0, off 3363; GFX10-NEXT: s_endpgm 3364; 3365; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 3366; GFX11: ; %bb.0: 3367; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3368; GFX11-NEXT: v_mov_b32_e32 v0, 0 3369; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3370; GFX11-NEXT: s_add_u32 s0, s0, 0x1000 3371; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3372; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3373; GFX11-NEXT: s_waitcnt vmcnt(0) 3374; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3375; GFX11-NEXT: s_endpgm 3376; 3377; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 3378; GFX12-GISEL: ; %bb.0: 3379; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3380; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3381; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3382; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1000 3383; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3384; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3385; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3386; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3387; GFX12-GISEL-NEXT: s_endpgm 3388; 3389; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1: 3390; GFX12-SDAG: ; %bb.0: 3391; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3392; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1000 3393; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3394; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3395; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3396; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3397; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3398; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3399; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3400; GFX12-SDAG-NEXT: s_endpgm 3401 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712 3402 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3403 store i8 %load, ptr addrspace(1) undef 3404 ret void 3405} 3406 3407; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 3408define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) { 3409; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 3410; GFX9: ; %bb.0: 3411; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3412; GFX9-NEXT: v_mov_b32_e32 v0, 0 3413; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3414; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff 3415; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3416; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3417; GFX9-NEXT: s_waitcnt vmcnt(0) 3418; GFX9-NEXT: global_store_byte v[0:1], v0, off 3419; GFX9-NEXT: s_endpgm 3420; 3421; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 3422; GFX10: ; %bb.0: 3423; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3424; GFX10-NEXT: v_mov_b32_e32 v0, 0 3425; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3426; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 3427; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3428; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3429; GFX10-NEXT: s_waitcnt vmcnt(0) 3430; GFX10-NEXT: global_store_byte v[0:1], v0, off 3431; GFX10-NEXT: s_endpgm 3432; 3433; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 3434; GFX11: ; %bb.0: 3435; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3436; GFX11-NEXT: v_mov_b32_e32 v0, 0 3437; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3438; GFX11-NEXT: s_add_u32 s0, s0, 0x1fff 3439; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3440; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3441; GFX11-NEXT: s_waitcnt vmcnt(0) 3442; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3443; GFX11-NEXT: s_endpgm 3444; 3445; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 3446; GFX12-GISEL: ; %bb.0: 3447; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3448; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3449; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3450; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1fff 3451; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3452; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3453; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3454; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3455; GFX12-GISEL-NEXT: s_endpgm 3456; 3457; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0: 3458; GFX12-SDAG: ; %bb.0: 3459; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3460; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1fff 3461; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3462; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3463; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3464; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3465; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3466; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3467; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3468; GFX12-SDAG-NEXT: s_endpgm 3469 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617 3470 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3471 store i8 %load, ptr addrspace(1) undef 3472 ret void 3473} 3474 3475; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 3476define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) { 3477; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 3478; GFX9: ; %bb.0: 3479; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3480; GFX9-NEXT: v_mov_b32_e32 v0, 0 3481; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3482; GFX9-NEXT: s_add_u32 s0, s0, 0x2000 3483; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000 3484; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc 3485; GFX9-NEXT: s_waitcnt vmcnt(0) 3486; GFX9-NEXT: global_store_byte v[0:1], v0, off 3487; GFX9-NEXT: s_endpgm 3488; 3489; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 3490; GFX10: ; %bb.0: 3491; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3492; GFX10-NEXT: v_mov_b32_e32 v0, 0 3493; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3494; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 3495; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3496; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc 3497; GFX10-NEXT: s_waitcnt vmcnt(0) 3498; GFX10-NEXT: global_store_byte v[0:1], v0, off 3499; GFX10-NEXT: s_endpgm 3500; 3501; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 3502; GFX11: ; %bb.0: 3503; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3504; GFX11-NEXT: v_mov_b32_e32 v0, 0 3505; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3506; GFX11-NEXT: s_add_u32 s0, s0, 0x2000 3507; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000 3508; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc 3509; GFX11-NEXT: s_waitcnt vmcnt(0) 3510; GFX11-NEXT: global_store_b8 v[0:1], v0, off 3511; GFX11-NEXT: s_endpgm 3512; 3513; GFX12-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 3514; GFX12-GISEL: ; %bb.0: 3515; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3516; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 3517; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3518; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x2000 3519; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3520; GFX12-GISEL-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3521; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 3522; GFX12-GISEL-NEXT: global_store_b8 v[0:1], v0, off 3523; GFX12-GISEL-NEXT: s_endpgm 3524; 3525; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1: 3526; GFX12-SDAG: ; %bb.0: 3527; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3528; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x2000 3529; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0 3530; GFX12-SDAG-NEXT: s_brev_b32 s3, 1 3531; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3532; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 3533; GFX12-SDAG-NEXT: global_load_u8 v0, v0, s[0:1] scope:SCOPE_SYS 3534; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 3535; GFX12-SDAG-NEXT: global_store_b8 v[0:1], v0, off 3536; GFX12-SDAG-NEXT: s_endpgm 3537 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616 3538 %load = load volatile i8, ptr addrspace(1) %gep, align 1 3539 store i8 %load, ptr addrspace(1) undef 3540 ret void 3541} 3542