1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s 3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s 5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s 7; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s 8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s 10 11; Test splitting flat instruction offsets into the low and high bits 12; when the offset doesn't fit in the offset field. 13 14define i8 @flat_inst_valu_offset_1(ptr %p) { 15; GFX9-LABEL: flat_inst_valu_offset_1: 16; GFX9: ; %bb.0: 17; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 19; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 20; GFX9-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX10-LABEL: flat_inst_valu_offset_1: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1 26; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 27; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 28; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 29; GFX10-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX11-LABEL: flat_inst_valu_offset_1: 32; GFX11: ; %bb.0: 33; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 35; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 36; GFX11-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX12-LABEL: flat_inst_valu_offset_1: 39; GFX12: ; %bb.0: 40; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 41; GFX12-NEXT: s_wait_expcnt 0x0 42; GFX12-NEXT: s_wait_samplecnt 0x0 43; GFX12-NEXT: s_wait_bvhcnt 0x0 44; GFX12-NEXT: s_wait_kmcnt 0x0 45; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:1 46; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 47; GFX12-NEXT: s_setpc_b64 s[30:31] 48 %gep = getelementptr i8, ptr %p, i64 1 49 %load = load i8, ptr %gep, align 4 50 ret i8 %load 51} 52 53define i8 @flat_inst_valu_offset_11bit_max(ptr %p) { 54; GFX9-LABEL: flat_inst_valu_offset_11bit_max: 55; GFX9: ; %bb.0: 56; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 58; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 59; GFX9-NEXT: s_setpc_b64 s[30:31] 60; 61; GFX10-LABEL: flat_inst_valu_offset_11bit_max: 62; GFX10: ; %bb.0: 63; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 65; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 66; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 67; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 68; GFX10-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX11-LABEL: flat_inst_valu_offset_11bit_max: 71; GFX11: ; %bb.0: 72; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 74; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 75; GFX11-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX12-LABEL: flat_inst_valu_offset_11bit_max: 78; GFX12: ; %bb.0: 79; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 80; GFX12-NEXT: s_wait_expcnt 0x0 81; GFX12-NEXT: s_wait_samplecnt 0x0 82; GFX12-NEXT: s_wait_bvhcnt 0x0 83; GFX12-NEXT: s_wait_kmcnt 0x0 84; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:2047 85; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 86; GFX12-NEXT: s_setpc_b64 s[30:31] 87 %gep = getelementptr i8, ptr %p, i64 2047 88 %load = load i8, ptr %gep, align 4 89 ret i8 %load 90} 91 92define i8 @flat_inst_valu_offset_12bit_max(ptr %p) { 93; GFX9-LABEL: flat_inst_valu_offset_12bit_max: 94; GFX9: ; %bb.0: 95; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 97; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 98; GFX9-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX10-LABEL: flat_inst_valu_offset_12bit_max: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 104; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 105; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 106; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 107; GFX10-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX11-LABEL: flat_inst_valu_offset_12bit_max: 110; GFX11: ; %bb.0: 111; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 113; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 114; GFX11-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX12-LABEL: flat_inst_valu_offset_12bit_max: 117; GFX12: ; %bb.0: 118; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 119; GFX12-NEXT: s_wait_expcnt 0x0 120; GFX12-NEXT: s_wait_samplecnt 0x0 121; GFX12-NEXT: s_wait_bvhcnt 0x0 122; GFX12-NEXT: s_wait_kmcnt 0x0 123; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:4095 124; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 125; GFX12-NEXT: s_setpc_b64 s[30:31] 126 %gep = getelementptr i8, ptr %p, i64 4095 127 %load = load i8, ptr %gep, align 4 128 ret i8 %load 129} 130 131define i8 @flat_inst_valu_offset_13bit_max(ptr %p) { 132; GFX9-SDAG-LABEL: flat_inst_valu_offset_13bit_max: 133; GFX9-SDAG: ; %bb.0: 134; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 136; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 137; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 138; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 139; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 140; 141; GFX10-LABEL: flat_inst_valu_offset_13bit_max: 142; GFX10: ; %bb.0: 143; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 145; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 146; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 147; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 148; GFX10-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max: 151; GFX11-SDAG: ; %bb.0: 152; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 154; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 155; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 156; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 157; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 158; 159; GFX12-LABEL: flat_inst_valu_offset_13bit_max: 160; GFX12: ; %bb.0: 161; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 162; GFX12-NEXT: s_wait_expcnt 0x0 163; GFX12-NEXT: s_wait_samplecnt 0x0 164; GFX12-NEXT: s_wait_bvhcnt 0x0 165; GFX12-NEXT: s_wait_kmcnt 0x0 166; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:8191 167; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 168; GFX12-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX9-GISEL-LABEL: flat_inst_valu_offset_13bit_max: 171; GFX9-GISEL: ; %bb.0: 172; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 174; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 175; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 176; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 177; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max: 180; GFX11-GISEL: ; %bb.0: 181; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 183; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 184; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 185; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 186; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 187 %gep = getelementptr i8, ptr %p, i64 8191 188 %load = load i8, ptr %gep, align 4 189 ret i8 %load 190} 191 192define i8 @flat_inst_valu_offset_24bit_max(ptr %p) { 193; GFX9-SDAG-LABEL: flat_inst_valu_offset_24bit_max: 194; GFX9-SDAG: ; %bb.0: 195; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff000, v0 197; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 198; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 199; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 200; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 201; 202; GFX10-LABEL: flat_inst_valu_offset_24bit_max: 203; GFX10: ; %bb.0: 204; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 205; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 206; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 207; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 208; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 209; GFX10-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX11-SDAG-LABEL: flat_inst_valu_offset_24bit_max: 212; GFX11-SDAG: ; %bb.0: 213; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff000, v0 215; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 216; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 217; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 218; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX12-LABEL: flat_inst_valu_offset_24bit_max: 221; GFX12: ; %bb.0: 222; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 223; GFX12-NEXT: s_wait_expcnt 0x0 224; GFX12-NEXT: s_wait_samplecnt 0x0 225; GFX12-NEXT: s_wait_bvhcnt 0x0 226; GFX12-NEXT: s_wait_kmcnt 0x0 227; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:8388607 228; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 229; GFX12-NEXT: s_setpc_b64 s[30:31] 230; 231; GFX9-GISEL-LABEL: flat_inst_valu_offset_24bit_max: 232; GFX9-GISEL: ; %bb.0: 233; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7fffff, v0 235; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 236; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 237; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 238; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 239; 240; GFX11-GISEL-LABEL: flat_inst_valu_offset_24bit_max: 241; GFX11-GISEL: ; %bb.0: 242; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7fffff, v0 244; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 245; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 246; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 247; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 248 %gep = getelementptr i8, ptr %p, i64 8388607 249 %load = load i8, ptr %gep, align 4 250 ret i8 %load 251} 252 253define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) { 254; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max: 255; GFX9: ; %bb.0: 256; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 258; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 259; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 260; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 261; GFX9-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max: 264; GFX10: ; %bb.0: 265; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 267; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 268; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 269; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 270; GFX10-NEXT: s_setpc_b64 s[30:31] 271; 272; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max: 273; GFX11: ; %bb.0: 274; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0 276; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 277; GFX11-NEXT: flat_load_u8 v0, v[0:1] 278; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 279; GFX11-NEXT: s_setpc_b64 s[30:31] 280; 281; GFX12-LABEL: flat_inst_valu_offset_neg_11bit_max: 282; GFX12: ; %bb.0: 283; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 284; GFX12-NEXT: s_wait_expcnt 0x0 285; GFX12-NEXT: s_wait_samplecnt 0x0 286; GFX12-NEXT: s_wait_bvhcnt 0x0 287; GFX12-NEXT: s_wait_kmcnt 0x0 288; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-2048 289; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 290; GFX12-NEXT: s_setpc_b64 s[30:31] 291 %gep = getelementptr i8, ptr %p, i64 -2048 292 %load = load i8, ptr %gep, align 4 293 ret i8 %load 294} 295 296define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) { 297; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max: 298; GFX9: ; %bb.0: 299; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 300; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 301; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 302; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 303; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 304; GFX9-NEXT: s_setpc_b64 s[30:31] 305; 306; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max: 307; GFX10: ; %bb.0: 308; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 310; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 311; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 312; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 313; GFX10-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max: 316; GFX11: ; %bb.0: 317; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 319; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 320; GFX11-NEXT: flat_load_u8 v0, v[0:1] 321; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 322; GFX11-NEXT: s_setpc_b64 s[30:31] 323; 324; GFX12-LABEL: flat_inst_valu_offset_neg_12bit_max: 325; GFX12: ; %bb.0: 326; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 327; GFX12-NEXT: s_wait_expcnt 0x0 328; GFX12-NEXT: s_wait_samplecnt 0x0 329; GFX12-NEXT: s_wait_bvhcnt 0x0 330; GFX12-NEXT: s_wait_kmcnt 0x0 331; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 332; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 333; GFX12-NEXT: s_setpc_b64 s[30:31] 334 %gep = getelementptr i8, ptr %p, i64 -4096 335 %load = load i8, ptr %gep, align 4 336 ret i8 %load 337} 338 339define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) { 340; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max: 341; GFX9: ; %bb.0: 342; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 344; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 345; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 346; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 347; GFX9-NEXT: s_setpc_b64 s[30:31] 348; 349; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max: 350; GFX10: ; %bb.0: 351; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 353; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 354; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 355; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 356; GFX10-NEXT: s_setpc_b64 s[30:31] 357; 358; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max: 359; GFX11: ; %bb.0: 360; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 362; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 363; GFX11-NEXT: flat_load_u8 v0, v[0:1] 364; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 365; GFX11-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX12-LABEL: flat_inst_valu_offset_neg_13bit_max: 368; GFX12: ; %bb.0: 369; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 370; GFX12-NEXT: s_wait_expcnt 0x0 371; GFX12-NEXT: s_wait_samplecnt 0x0 372; GFX12-NEXT: s_wait_bvhcnt 0x0 373; GFX12-NEXT: s_wait_kmcnt 0x0 374; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 375; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 376; GFX12-NEXT: s_setpc_b64 s[30:31] 377 %gep = getelementptr i8, ptr %p, i64 -8192 378 %load = load i8, ptr %gep, align 4 379 ret i8 %load 380} 381 382define i8 @flat_inst_valu_offset_neg_24bit_max(ptr %p) { 383; GFX9-LABEL: flat_inst_valu_offset_neg_24bit_max: 384; GFX9: ; %bb.0: 385; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xff800000, v0 387; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 388; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 389; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 390; GFX9-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX10-LABEL: flat_inst_valu_offset_neg_24bit_max: 393; GFX10: ; %bb.0: 394; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 396; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 397; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 398; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 399; GFX10-NEXT: s_setpc_b64 s[30:31] 400; 401; GFX11-LABEL: flat_inst_valu_offset_neg_24bit_max: 402; GFX11: ; %bb.0: 403; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 405; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 406; GFX11-NEXT: flat_load_u8 v0, v[0:1] 407; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 408; GFX11-NEXT: s_setpc_b64 s[30:31] 409; 410; GFX12-LABEL: flat_inst_valu_offset_neg_24bit_max: 411; GFX12: ; %bb.0: 412; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 413; GFX12-NEXT: s_wait_expcnt 0x0 414; GFX12-NEXT: s_wait_samplecnt 0x0 415; GFX12-NEXT: s_wait_bvhcnt 0x0 416; GFX12-NEXT: s_wait_kmcnt 0x0 417; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-8388608 418; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 419; GFX12-NEXT: s_setpc_b64 s[30:31] 420 %gep = getelementptr i8, ptr %p, i64 -8388608 421 %load = load i8, ptr %gep, align 4 422 ret i8 %load 423} 424 425 426define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) { 427; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max: 428; GFX9: ; %bb.0: 429; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 431; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 432; GFX9-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max: 435; GFX10: ; %bb.0: 436; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 438; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 439; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 440; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 441; GFX10-NEXT: s_setpc_b64 s[30:31] 442; 443; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max: 444; GFX11: ; %bb.0: 445; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 447; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 448; GFX11-NEXT: s_setpc_b64 s[30:31] 449; 450; GFX12-LABEL: flat_inst_valu_offset_2x_11bit_max: 451; GFX12: ; %bb.0: 452; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 453; GFX12-NEXT: s_wait_expcnt 0x0 454; GFX12-NEXT: s_wait_samplecnt 0x0 455; GFX12-NEXT: s_wait_bvhcnt 0x0 456; GFX12-NEXT: s_wait_kmcnt 0x0 457; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:4095 458; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 459; GFX12-NEXT: s_setpc_b64 s[30:31] 460 %gep = getelementptr i8, ptr %p, i64 4095 461 %load = load i8, ptr %gep, align 4 462 ret i8 %load 463} 464 465define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) { 466; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: 467; GFX9-SDAG: ; %bb.0: 468; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 470; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 471; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 472; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 473; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 474; 475; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max: 476; GFX10: ; %bb.0: 477; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 478; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 479; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 480; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 481; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 482; GFX10-NEXT: s_setpc_b64 s[30:31] 483; 484; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max: 485; GFX11-SDAG: ; %bb.0: 486; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 487; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 488; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 489; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 490; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 491; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX12-LABEL: flat_inst_valu_offset_2x_12bit_max: 494; GFX12: ; %bb.0: 495; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 496; GFX12-NEXT: s_wait_expcnt 0x0 497; GFX12-NEXT: s_wait_samplecnt 0x0 498; GFX12-NEXT: s_wait_bvhcnt 0x0 499; GFX12-NEXT: s_wait_kmcnt 0x0 500; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:8191 501; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 502; GFX12-NEXT: s_setpc_b64 s[30:31] 503; 504; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: 505; GFX9-GISEL: ; %bb.0: 506; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 508; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 509; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 510; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 511; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max: 514; GFX11-GISEL: ; %bb.0: 515; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 517; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 518; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 519; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 520; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 521 %gep = getelementptr i8, ptr %p, i64 8191 522 %load = load i8, ptr %gep, align 4 523 ret i8 %load 524} 525 526define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) { 527; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: 528; GFX9-SDAG: ; %bb.0: 529; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 531; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 532; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 533; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 534; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 535; 536; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max: 537; GFX10: ; %bb.0: 538; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 540; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 541; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 542; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 543; GFX10-NEXT: s_setpc_b64 s[30:31] 544; 545; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max: 546; GFX11-SDAG: ; %bb.0: 547; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 548; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0 549; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 550; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 551; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 552; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 553; 554; GFX12-LABEL: flat_inst_valu_offset_2x_13bit_max: 555; GFX12: ; %bb.0: 556; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 557; GFX12-NEXT: s_wait_expcnt 0x0 558; GFX12-NEXT: s_wait_samplecnt 0x0 559; GFX12-NEXT: s_wait_bvhcnt 0x0 560; GFX12-NEXT: s_wait_kmcnt 0x0 561; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:16383 562; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 563; GFX12-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: 566; GFX9-GISEL: ; %bb.0: 567; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0 569; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 570; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 571; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 572; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max: 575; GFX11-GISEL: ; %bb.0: 576; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0 578; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 579; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 580; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 581; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 582 %gep = getelementptr i8, ptr %p, i64 16383 583 %load = load i8, ptr %gep, align 4 584 ret i8 %load 585} 586 587define i8 @flat_inst_valu_offset_2x_24bit_max(ptr %p) { 588; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max: 589; GFX9-SDAG: ; %bb.0: 590; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff000, v0 592; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 593; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4094 594; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 595; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX10-LABEL: flat_inst_valu_offset_2x_24bit_max: 598; GFX10: ; %bb.0: 599; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 601; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 602; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 603; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 604; GFX10-NEXT: s_setpc_b64 s[30:31] 605; 606; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max: 607; GFX11-SDAG: ; %bb.0: 608; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff000, v0 610; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 611; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4094 612; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 613; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 614; 615; GFX12-SDAG-LABEL: flat_inst_valu_offset_2x_24bit_max: 616; GFX12-SDAG: ; %bb.0: 617; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 618; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 619; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 620; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 621; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 622; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 623; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 624; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8388606 625; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 626; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 627; 628; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max: 629; GFX9-GISEL: ; %bb.0: 630; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffffe, v0 632; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 633; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 634; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 635; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max: 638; GFX11-GISEL: ; %bb.0: 639; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 641; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 642; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 643; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 644; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_24bit_max: 647; GFX12-GISEL: ; %bb.0: 648; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 649; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 650; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 651; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 652; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 653; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffffe, v0 654; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 655; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 656; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 657; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 658 %gep = getelementptr i8, ptr %p, i64 16777214 659 %load = load i8, ptr %gep, align 4 660 ret i8 %load 661} 662 663define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) { 664; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 665; GFX9: ; %bb.0: 666; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 668; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 669; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 670; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 671; GFX9-NEXT: s_setpc_b64 s[30:31] 672; 673; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 674; GFX10: ; %bb.0: 675; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 677; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 678; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 679; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 680; GFX10-NEXT: s_setpc_b64 s[30:31] 681; 682; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 683; GFX11: ; %bb.0: 684; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0 686; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 687; GFX11-NEXT: flat_load_u8 v0, v[0:1] 688; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 689; GFX11-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX12-LABEL: flat_inst_valu_offset_2x_neg_11bit_max: 692; GFX12: ; %bb.0: 693; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 694; GFX12-NEXT: s_wait_expcnt 0x0 695; GFX12-NEXT: s_wait_samplecnt 0x0 696; GFX12-NEXT: s_wait_bvhcnt 0x0 697; GFX12-NEXT: s_wait_kmcnt 0x0 698; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 699; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 700; GFX12-NEXT: s_setpc_b64 s[30:31] 701 %gep = getelementptr i8, ptr %p, i64 -4096 702 %load = load i8, ptr %gep, align 4 703 ret i8 %load 704} 705 706define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) { 707; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 708; GFX9: ; %bb.0: 709; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 710; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 711; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 712; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 713; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 714; GFX9-NEXT: s_setpc_b64 s[30:31] 715; 716; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 717; GFX10: ; %bb.0: 718; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 719; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 720; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 721; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 722; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 723; GFX10-NEXT: s_setpc_b64 s[30:31] 724; 725; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 726; GFX11: ; %bb.0: 727; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0 729; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 730; GFX11-NEXT: flat_load_u8 v0, v[0:1] 731; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 732; GFX11-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX12-LABEL: flat_inst_valu_offset_2x_neg_12bit_max: 735; GFX12: ; %bb.0: 736; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 737; GFX12-NEXT: s_wait_expcnt 0x0 738; GFX12-NEXT: s_wait_samplecnt 0x0 739; GFX12-NEXT: s_wait_bvhcnt 0x0 740; GFX12-NEXT: s_wait_kmcnt 0x0 741; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 742; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 743; GFX12-NEXT: s_setpc_b64 s[30:31] 744 %gep = getelementptr i8, ptr %p, i64 -8192 745 %load = load i8, ptr %gep, align 4 746 ret i8 %load 747} 748 749define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) { 750; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 751; GFX9: ; %bb.0: 752; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 753; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 754; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 755; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 756; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 757; GFX9-NEXT: s_setpc_b64 s[30:31] 758; 759; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 760; GFX10: ; %bb.0: 761; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 762; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 763; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 764; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 765; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 766; GFX10-NEXT: s_setpc_b64 s[30:31] 767; 768; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 769; GFX11: ; %bb.0: 770; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 771; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0 772; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 773; GFX11-NEXT: flat_load_u8 v0, v[0:1] 774; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 775; GFX11-NEXT: s_setpc_b64 s[30:31] 776; 777; GFX12-LABEL: flat_inst_valu_offset_2x_neg_13bit_max: 778; GFX12: ; %bb.0: 779; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 780; GFX12-NEXT: s_wait_expcnt 0x0 781; GFX12-NEXT: s_wait_samplecnt 0x0 782; GFX12-NEXT: s_wait_bvhcnt 0x0 783; GFX12-NEXT: s_wait_kmcnt 0x0 784; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-16384 785; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 786; GFX12-NEXT: s_setpc_b64 s[30:31] 787 %gep = getelementptr i8, ptr %p, i64 -16384 788 %load = load i8, ptr %gep, align 4 789 ret i8 %load 790} 791 792define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) { 793; GFX9-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: 794; GFX9: ; %bb.0: 795; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xff000001, v0 797; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 798; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 799; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 800; GFX9-NEXT: s_setpc_b64 s[30:31] 801; 802; GFX10-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: 803; GFX10: ; %bb.0: 804; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 806; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 807; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 808; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 809; GFX10-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX11-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: 812; GFX11: ; %bb.0: 813; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 815; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 816; GFX11-NEXT: flat_load_u8 v0, v[0:1] 817; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 818; GFX11-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX12-SDAG-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: 821; GFX12-SDAG: ; %bb.0: 822; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 823; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 824; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 825; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 826; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 827; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xff800000, v0 828; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 829; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8388607 830; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 831; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 832; 833; GFX12-GISEL-LABEL: flat_inst_valu_offset_2x_neg_24bit_max: 834; GFX12-GISEL: ; %bb.0: 835; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 836; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 837; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 838; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 839; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 840; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xff000001, v0 841; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo 842; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 843; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 844; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 845 %gep = getelementptr i8, ptr %p, i64 -16777215 846 %load = load i8, ptr %gep, align 4 847 ret i8 %load 848} 849 850; Fill 11-bit low-bits (1ull << 33) | 2047 851define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) { 852; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 853; GFX9-SDAG: ; %bb.0: 854; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 855; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 856; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 857; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 858; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 859; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 860; 861; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 862; GFX10: ; %bb.0: 863; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 865; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 866; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 867; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 868; GFX10-NEXT: s_setpc_b64 s[30:31] 869; 870; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 871; GFX11-SDAG: ; %bb.0: 872; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 873; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 874; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 875; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 876; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 877; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 878; 879; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 880; GFX12-SDAG: ; %bb.0: 881; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 882; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 883; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 884; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 885; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 886; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 887; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 888; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 889; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 890; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 891; 892; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 893; GFX9-GISEL: ; %bb.0: 894; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 895; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 896; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 897; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 898; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 899; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 900; 901; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 902; GFX11-GISEL: ; %bb.0: 903; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 904; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 905; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 906; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 907; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 908; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0: 911; GFX12-GISEL: ; %bb.0: 912; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 913; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 914; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 915; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 916; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 917; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 918; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 919; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 920; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 921; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 922 %gep = getelementptr i8, ptr %p, i64 8589936639 923 %load = load i8, ptr %gep, align 4 924 ret i8 %load 925} 926 927; Fill 11-bit low-bits (1ull << 33) | 2048 928define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) { 929; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 930; GFX9-SDAG: ; %bb.0: 931; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 932; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 933; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 934; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 935; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 936; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 937; 938; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 939; GFX10: ; %bb.0: 940; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 941; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 942; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 943; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 944; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 945; GFX10-NEXT: s_setpc_b64 s[30:31] 946; 947; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 948; GFX11-SDAG: ; %bb.0: 949; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 951; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 952; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 953; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 954; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 955; 956; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 957; GFX12-SDAG: ; %bb.0: 958; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 959; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 960; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 961; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 962; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 963; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 964; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 965; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 966; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 967; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 968; 969; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 970; GFX9-GISEL: ; %bb.0: 971; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 973; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 974; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 975; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 976; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 977; 978; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 979; GFX11-GISEL: ; %bb.0: 980; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 981; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 982; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 983; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 984; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 985; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 986; 987; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1: 988; GFX12-GISEL: ; %bb.0: 989; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 990; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 991; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 992; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 993; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 994; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 995; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 996; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 997; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 998; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 999 %gep = getelementptr i8, ptr %p, i64 8589936640 1000 %load = load i8, ptr %gep, align 4 1001 ret i8 %load 1002} 1003 1004; Fill 12-bit low-bits (1ull << 33) | 4095 1005define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) { 1006; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1007; GFX9-SDAG: ; %bb.0: 1008; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1009; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 1010; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1011; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 1012; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1013; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1014; 1015; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1016; GFX10: ; %bb.0: 1017; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1018; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1019; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1020; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1021; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1022; GFX10-NEXT: s_setpc_b64 s[30:31] 1023; 1024; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1025; GFX11-SDAG: ; %bb.0: 1026; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1028; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1029; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 1030; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1031; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1032; 1033; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1034; GFX12-SDAG: ; %bb.0: 1035; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1036; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1037; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1038; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1039; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1040; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1041; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1042; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 1043; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1044; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1045; 1046; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1047; GFX9-GISEL: ; %bb.0: 1048; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1049; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1050; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1051; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1052; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1053; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1054; 1055; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1056; GFX11-GISEL: ; %bb.0: 1057; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1058; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1059; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1060; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1061; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1062; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1063; 1064; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0: 1065; GFX12-GISEL: ; %bb.0: 1066; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1067; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1068; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1069; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1070; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1071; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1072; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1073; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1074; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1075; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1076 %gep = getelementptr i8, ptr %p, i64 8589938687 1077 %load = load i8, ptr %gep, align 4 1078 ret i8 %load 1079} 1080 1081; Fill 12-bit low-bits (1ull << 33) | 4096 1082define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) { 1083; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 1084; GFX9: ; %bb.0: 1085; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1086; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1087; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1088; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1089; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1090; GFX9-NEXT: s_setpc_b64 s[30:31] 1091; 1092; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 1093; GFX10: ; %bb.0: 1094; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1095; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1096; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1097; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1098; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1099; GFX10-NEXT: s_setpc_b64 s[30:31] 1100; 1101; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 1102; GFX11: ; %bb.0: 1103; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1104; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1105; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1106; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1107; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1108; GFX11-NEXT: s_setpc_b64 s[30:31] 1109; 1110; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 1111; GFX12-SDAG: ; %bb.0: 1112; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1113; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1114; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1115; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1116; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1117; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1118; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1119; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4096 1120; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1121; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1122; 1123; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1: 1124; GFX12-GISEL: ; %bb.0: 1125; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1126; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1127; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1128; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1129; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1130; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1131; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1132; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1133; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1134; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1135 %gep = getelementptr i8, ptr %p, i64 8589938688 1136 %load = load i8, ptr %gep, align 4 1137 ret i8 %load 1138} 1139 1140; Fill 13-bit low-bits (1ull << 33) | 8191 1141define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) { 1142; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1143; GFX9-SDAG: ; %bb.0: 1144; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1145; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1146; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1147; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 1148; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1149; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1150; 1151; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1152; GFX10: ; %bb.0: 1153; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1154; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1155; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1156; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1157; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1158; GFX10-NEXT: s_setpc_b64 s[30:31] 1159; 1160; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1161; GFX11-SDAG: ; %bb.0: 1162; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1163; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1164; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1165; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 1166; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1167; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 1168; 1169; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1170; GFX12-SDAG: ; %bb.0: 1171; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1172; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1173; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1174; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1175; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1176; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1177; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1178; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8191 1179; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1180; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1181; 1182; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1183; GFX9-GISEL: ; %bb.0: 1184; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1186; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1187; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1188; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1189; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1190; 1191; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1192; GFX11-GISEL: ; %bb.0: 1193; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1194; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1195; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1196; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1197; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1198; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 1199; 1200; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0: 1201; GFX12-GISEL: ; %bb.0: 1202; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1203; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1204; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1205; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1206; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1207; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1208; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1209; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1210; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1211; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1212 %gep = getelementptr i8, ptr %p, i64 8589942783 1213 %load = load i8, ptr %gep, align 4 1214 ret i8 %load 1215} 1216 1217; Fill 13-bit low-bits (1ull << 33) | 8192 1218define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) { 1219; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 1220; GFX9: ; %bb.0: 1221; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1222; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1223; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 1224; GFX9-NEXT: flat_load_ubyte v0, v[0:1] 1225; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1226; GFX9-NEXT: s_setpc_b64 s[30:31] 1227; 1228; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 1229; GFX10: ; %bb.0: 1230; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1231; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1232; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1233; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1234; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1235; GFX10-NEXT: s_setpc_b64 s[30:31] 1236; 1237; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 1238; GFX11: ; %bb.0: 1239; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1240; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1241; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1242; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1243; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1244; GFX11-NEXT: s_setpc_b64 s[30:31] 1245; 1246; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 1247; GFX12-SDAG: ; %bb.0: 1248; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1249; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1250; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1251; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1252; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1253; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0 1254; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1255; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8192 1256; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1257; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1258; 1259; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1: 1260; GFX12-GISEL: ; %bb.0: 1261; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1262; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1263; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1264; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1265; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1266; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1267; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo 1268; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1269; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1270; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1271 %gep = getelementptr i8, ptr %p, i64 8589942784 1272 %load = load i8, ptr %gep, align 4 1273 ret i8 %load 1274} 1275 1276; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 1277define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) { 1278; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1279; GFX9-SDAG: ; %bb.0: 1280; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 1282; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1283; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1284; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1285; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1286; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1287; 1288; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1289; GFX10: ; %bb.0: 1290; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1291; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1292; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1293; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1294; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1295; GFX10-NEXT: s_setpc_b64 s[30:31] 1296; 1297; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1298; GFX11: ; %bb.0: 1299; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1300; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1301; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1302; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1303; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1304; GFX11-NEXT: s_setpc_b64 s[30:31] 1305; 1306; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1307; GFX12-SDAG: ; %bb.0: 1308; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1309; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1310; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1311; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1312; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1313; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1314; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1315; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8386561 1316; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1317; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1318; 1319; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1320; GFX9-GISEL: ; %bb.0: 1321; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1322; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1323; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 1324; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1325; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1326; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1327; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1328; 1329; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0: 1330; GFX12-GISEL: ; %bb.0: 1331; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1332; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1333; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1334; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1335; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1336; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0 1337; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1338; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1339; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1340; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1341 %gep = getelementptr i8, ptr %p, i64 -9223372036854773761 1342 %load = load i8, ptr %gep, align 4 1343 ret i8 %load 1344} 1345 1346; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 1347define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) { 1348; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1349; GFX9-SDAG: ; %bb.0: 1350; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1351; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1352; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1353; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1354; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1355; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1356; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1357; 1358; GFX10-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1359; GFX10: ; %bb.0: 1360; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1362; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1363; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1364; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1365; GFX10-NEXT: s_setpc_b64 s[30:31] 1366; 1367; GFX11-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1368; GFX11: ; %bb.0: 1369; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1370; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1371; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1372; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1373; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1374; GFX11-NEXT: s_setpc_b64 s[30:31] 1375; 1376; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1377; GFX12-SDAG: ; %bb.0: 1378; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1379; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1380; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1381; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1382; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1383; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1384; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1385; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8386560 1386; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1387; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1388; 1389; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1390; GFX9-GISEL: ; %bb.0: 1391; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1393; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 1394; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1395; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1396; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1397; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1398; 1399; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1: 1400; GFX12-GISEL: ; %bb.0: 1401; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1402; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1403; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1404; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1405; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1406; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0 1407; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1408; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1409; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1410; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1411 %gep = getelementptr i8, ptr %p, i64 -9223372036854773760 1412 %load = load i8, ptr %gep, align 4 1413 ret i8 %load 1414} 1415 1416; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 1417define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) { 1418; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1419; GFX9-SDAG: ; %bb.0: 1420; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1422; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1423; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1424; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1425; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1426; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1427; 1428; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1429; GFX10: ; %bb.0: 1430; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1432; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1433; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1434; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1435; GFX10-NEXT: s_setpc_b64 s[30:31] 1436; 1437; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1438; GFX11: ; %bb.0: 1439; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1440; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1441; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1442; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1443; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1444; GFX11-NEXT: s_setpc_b64 s[30:31] 1445; 1446; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1447; GFX12-SDAG: ; %bb.0: 1448; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1449; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1450; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1451; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1452; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1453; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1454; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1455; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8384513 1456; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1457; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1458; 1459; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1460; GFX9-GISEL: ; %bb.0: 1461; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1462; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1463; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 1464; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1465; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1466; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1467; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1468; 1469; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0: 1470; GFX12-GISEL: ; %bb.0: 1471; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1472; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1473; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1474; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1475; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1476; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0 1477; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1478; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1479; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1480; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1481 %gep = getelementptr i8, ptr %p, i64 -9223372036854771713 1482 %load = load i8, ptr %gep, align 4 1483 ret i8 %load 1484} 1485 1486; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 1487define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) { 1488; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1489; GFX9-SDAG: ; %bb.0: 1490; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1492; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1493; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1494; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1495; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1496; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1497; 1498; GFX10-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1499; GFX10: ; %bb.0: 1500; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1501; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1502; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1503; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1504; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1505; GFX10-NEXT: s_setpc_b64 s[30:31] 1506; 1507; GFX11-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1508; GFX11: ; %bb.0: 1509; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1511; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1512; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1513; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1514; GFX11-NEXT: s_setpc_b64 s[30:31] 1515; 1516; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1517; GFX12-SDAG: ; %bb.0: 1518; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1519; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1520; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1521; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1522; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1523; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1524; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1525; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8384512 1526; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1527; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1528; 1529; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1530; GFX9-GISEL: ; %bb.0: 1531; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1532; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1533; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1534; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1535; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1536; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1537; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1538; 1539; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1: 1540; GFX12-GISEL: ; %bb.0: 1541; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1542; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1543; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1544; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1545; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1546; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 1547; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1548; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1549; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1550; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1551 %gep = getelementptr i8, ptr %p, i64 -9223372036854771712 1552 %load = load i8, ptr %gep, align 4 1553 ret i8 %load 1554} 1555 1556; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 1557define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) { 1558; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1559; GFX9-SDAG: ; %bb.0: 1560; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1561; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1562; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1563; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1564; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1565; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1566; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1567; 1568; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1569; GFX10: ; %bb.0: 1570; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1571; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1572; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1573; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1574; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1575; GFX10-NEXT: s_setpc_b64 s[30:31] 1576; 1577; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1578; GFX11: ; %bb.0: 1579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1580; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1581; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1582; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1583; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1584; GFX11-NEXT: s_setpc_b64 s[30:31] 1585; 1586; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1587; GFX12-SDAG: ; %bb.0: 1588; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1589; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1590; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1591; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1592; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1593; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1594; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1595; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8380417 1596; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1597; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1598; 1599; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1600; GFX9-GISEL: ; %bb.0: 1601; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1602; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1603; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 1604; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1605; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1606; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1607; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1608; 1609; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0: 1610; GFX12-GISEL: ; %bb.0: 1611; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1612; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1613; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1614; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1615; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1616; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0 1617; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1618; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1619; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1620; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1621 %gep = getelementptr i8, ptr %p, i64 -9223372036854767617 1622 %load = load i8, ptr %gep, align 4 1623 ret i8 %load 1624} 1625 1626; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 1627define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) { 1628; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1629; GFX9-SDAG: ; %bb.0: 1630; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1631; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1632; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1 1633; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc 1634; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] 1635; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1636; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 1637; 1638; GFX10-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1639; GFX10: ; %bb.0: 1640; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1641; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1642; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1643; GFX10-NEXT: flat_load_ubyte v0, v[0:1] 1644; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1645; GFX10-NEXT: s_setpc_b64 s[30:31] 1646; 1647; GFX11-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1648; GFX11: ; %bb.0: 1649; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1650; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1651; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1652; GFX11-NEXT: flat_load_u8 v0, v[0:1] 1653; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1654; GFX11-NEXT: s_setpc_b64 s[30:31] 1655; 1656; GFX12-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1657; GFX12-SDAG: ; %bb.0: 1658; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1659; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 1660; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 1661; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 1662; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 1663; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, v0 1664; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1665; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8380416 1666; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 1667; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 1668; 1669; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1670; GFX9-GISEL: ; %bb.0: 1671; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1672; GFX9-GISEL-NEXT: v_bfrev_b32_e32 v2, 1 1673; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 1674; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 1675; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] 1676; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1677; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 1678; 1679; GFX12-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1: 1680; GFX12-GISEL: ; %bb.0: 1681; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1682; GFX12-GISEL-NEXT: s_wait_expcnt 0x0 1683; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0 1684; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0 1685; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 1686; GFX12-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0 1687; GFX12-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 1688; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] 1689; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 1690; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] 1691 %gep = getelementptr i8, ptr %p, i64 -9223372036854767616 1692 %load = load i8, ptr %gep, align 4 1693 ret i8 %load 1694} 1695 1696define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) { 1697; GFX9-LABEL: flat_inst_salu_offset_1: 1698; GFX9: ; %bb.0: 1699; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1700; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1701; GFX9-NEXT: v_mov_b32_e32 v0, s0 1702; GFX9-NEXT: v_mov_b32_e32 v1, s1 1703; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 glc 1704; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1705; GFX9-NEXT: flat_store_byte v[0:1], v0 1706; GFX9-NEXT: s_endpgm 1707; 1708; GFX10-LABEL: flat_inst_salu_offset_1: 1709; GFX10: ; %bb.0: 1710; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1711; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1712; GFX10-NEXT: s_add_u32 s0, s0, 1 1713; GFX10-NEXT: s_addc_u32 s1, s1, 0 1714; GFX10-NEXT: v_mov_b32_e32 v0, s0 1715; GFX10-NEXT: v_mov_b32_e32 v1, s1 1716; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1717; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1718; GFX10-NEXT: flat_store_byte v[0:1], v0 1719; GFX10-NEXT: s_endpgm 1720; 1721; GFX11-LABEL: flat_inst_salu_offset_1: 1722; GFX11: ; %bb.0: 1723; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1724; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1725; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1726; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc 1727; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1728; GFX11-NEXT: flat_store_b8 v[0:1], v0 1729; GFX11-NEXT: s_endpgm 1730; 1731; GFX12-LABEL: flat_inst_salu_offset_1: 1732; GFX12: ; %bb.0: 1733; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1734; GFX12-NEXT: s_wait_kmcnt 0x0 1735; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1736; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:1 scope:SCOPE_SYS 1737; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1738; GFX12-NEXT: flat_store_b8 v[0:1], v0 1739; GFX12-NEXT: s_endpgm 1740 %gep = getelementptr i8, ptr %p, i64 1 1741 %load = load volatile i8, ptr %gep, align 1 1742 store i8 %load, ptr undef 1743 ret void 1744} 1745 1746define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) { 1747; GFX9-LABEL: flat_inst_salu_offset_11bit_max: 1748; GFX9: ; %bb.0: 1749; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1750; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1751; GFX9-NEXT: v_mov_b32_e32 v0, s0 1752; GFX9-NEXT: v_mov_b32_e32 v1, s1 1753; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc 1754; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1755; GFX9-NEXT: flat_store_byte v[0:1], v0 1756; GFX9-NEXT: s_endpgm 1757; 1758; GFX10-LABEL: flat_inst_salu_offset_11bit_max: 1759; GFX10: ; %bb.0: 1760; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1761; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1762; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 1763; GFX10-NEXT: s_addc_u32 s1, s1, 0 1764; GFX10-NEXT: v_mov_b32_e32 v0, s0 1765; GFX10-NEXT: v_mov_b32_e32 v1, s1 1766; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1767; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1768; GFX10-NEXT: flat_store_byte v[0:1], v0 1769; GFX10-NEXT: s_endpgm 1770; 1771; GFX11-LABEL: flat_inst_salu_offset_11bit_max: 1772; GFX11: ; %bb.0: 1773; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1774; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1775; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1776; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc 1777; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1778; GFX11-NEXT: flat_store_b8 v[0:1], v0 1779; GFX11-NEXT: s_endpgm 1780; 1781; GFX12-LABEL: flat_inst_salu_offset_11bit_max: 1782; GFX12: ; %bb.0: 1783; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1784; GFX12-NEXT: s_wait_kmcnt 0x0 1785; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1786; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:2047 scope:SCOPE_SYS 1787; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1788; GFX12-NEXT: flat_store_b8 v[0:1], v0 1789; GFX12-NEXT: s_endpgm 1790 %gep = getelementptr i8, ptr %p, i64 2047 1791 %load = load volatile i8, ptr %gep, align 1 1792 store i8 %load, ptr undef 1793 ret void 1794} 1795 1796define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) { 1797; GFX9-LABEL: flat_inst_salu_offset_12bit_max: 1798; GFX9: ; %bb.0: 1799; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1800; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1801; GFX9-NEXT: v_mov_b32_e32 v0, s0 1802; GFX9-NEXT: v_mov_b32_e32 v1, s1 1803; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1804; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1805; GFX9-NEXT: flat_store_byte v[0:1], v0 1806; GFX9-NEXT: s_endpgm 1807; 1808; GFX10-LABEL: flat_inst_salu_offset_12bit_max: 1809; GFX10: ; %bb.0: 1810; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1811; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1812; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 1813; GFX10-NEXT: s_addc_u32 s1, s1, 0 1814; GFX10-NEXT: v_mov_b32_e32 v0, s0 1815; GFX10-NEXT: v_mov_b32_e32 v1, s1 1816; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1817; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1818; GFX10-NEXT: flat_store_byte v[0:1], v0 1819; GFX10-NEXT: s_endpgm 1820; 1821; GFX11-LABEL: flat_inst_salu_offset_12bit_max: 1822; GFX11: ; %bb.0: 1823; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1824; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1825; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1826; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1827; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1828; GFX11-NEXT: flat_store_b8 v[0:1], v0 1829; GFX11-NEXT: s_endpgm 1830; 1831; GFX12-LABEL: flat_inst_salu_offset_12bit_max: 1832; GFX12: ; %bb.0: 1833; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1834; GFX12-NEXT: s_wait_kmcnt 0x0 1835; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1836; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS 1837; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1838; GFX12-NEXT: flat_store_b8 v[0:1], v0 1839; GFX12-NEXT: s_endpgm 1840 %gep = getelementptr i8, ptr %p, i64 4095 1841 %load = load volatile i8, ptr %gep, align 1 1842 store i8 %load, ptr undef 1843 ret void 1844} 1845 1846define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(ptr %p) { 1847; GFX9-SDAG-LABEL: flat_inst_salu_offset_13bit_max: 1848; GFX9-SDAG: ; %bb.0: 1849; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1850; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1851; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 1852; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 1853; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 1854; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 1855; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 1856; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1857; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 1858; GFX9-SDAG-NEXT: s_endpgm 1859; 1860; GFX10-LABEL: flat_inst_salu_offset_13bit_max: 1861; GFX10: ; %bb.0: 1862; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1863; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1864; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 1865; GFX10-NEXT: s_addc_u32 s1, s1, 0 1866; GFX10-NEXT: v_mov_b32_e32 v0, s0 1867; GFX10-NEXT: v_mov_b32_e32 v1, s1 1868; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1869; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1870; GFX10-NEXT: flat_store_byte v[0:1], v0 1871; GFX10-NEXT: s_endpgm 1872; 1873; GFX11-SDAG-LABEL: flat_inst_salu_offset_13bit_max: 1874; GFX11-SDAG: ; %bb.0: 1875; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1876; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1877; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 1878; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1879; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 1880; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 1881; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1882; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 1883; GFX11-SDAG-NEXT: s_endpgm 1884; 1885; GFX12-LABEL: flat_inst_salu_offset_13bit_max: 1886; GFX12: ; %bb.0: 1887; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1888; GFX12-NEXT: s_wait_kmcnt 0x0 1889; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1890; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS 1891; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1892; GFX12-NEXT: flat_store_b8 v[0:1], v0 1893; GFX12-NEXT: s_endpgm 1894; 1895; GFX9-GISEL-LABEL: flat_inst_salu_offset_13bit_max: 1896; GFX9-GISEL: ; %bb.0: 1897; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1898; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1899; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 1900; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 1901; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 1902; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 1903; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 1904; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1905; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 1906; GFX9-GISEL-NEXT: s_endpgm 1907; 1908; GFX11-GISEL-LABEL: flat_inst_salu_offset_13bit_max: 1909; GFX11-GISEL: ; %bb.0: 1910; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1911; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1912; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 1913; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0 1914; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1915; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1916; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1917; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1918; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 1919; GFX11-GISEL-NEXT: s_endpgm 1920 %gep = getelementptr i8, ptr %p, i64 8191 1921 %load = load volatile i8, ptr %gep, align 1 1922 store i8 %load, ptr undef 1923 ret void 1924} 1925 1926define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(ptr %p) { 1927; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max: 1928; GFX9-SDAG: ; %bb.0: 1929; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1930; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1931; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 1932; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 1933; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0 1934; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 1935; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 1936; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1937; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 1938; GFX9-SDAG-NEXT: s_endpgm 1939; 1940; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max: 1941; GFX10: ; %bb.0: 1942; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1943; GFX10-NEXT: s_waitcnt lgkmcnt(0) 1944; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff800 1945; GFX10-NEXT: s_addc_u32 s1, s1, -1 1946; GFX10-NEXT: v_mov_b32_e32 v0, s0 1947; GFX10-NEXT: v_mov_b32_e32 v1, s1 1948; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 1949; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1950; GFX10-NEXT: flat_store_byte v[0:1], v0 1951; GFX10-NEXT: s_endpgm 1952; 1953; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max: 1954; GFX11-SDAG: ; %bb.0: 1955; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1956; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1957; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0 1958; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1959; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 1960; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1961; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1962; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 1963; GFX11-SDAG-NEXT: s_endpgm 1964; 1965; GFX12-LABEL: flat_inst_salu_offset_neg_11bit_max: 1966; GFX12: ; %bb.0: 1967; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1968; GFX12-NEXT: s_wait_kmcnt 0x0 1969; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1970; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-2048 scope:SCOPE_SYS 1971; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1972; GFX12-NEXT: flat_store_b8 v[0:1], v0 1973; GFX12-NEXT: s_endpgm 1974; 1975; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max: 1976; GFX9-GISEL: ; %bb.0: 1977; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1978; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1979; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff800 1980; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 1981; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 1982; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 1983; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 1984; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1985; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 1986; GFX9-GISEL-NEXT: s_endpgm 1987; 1988; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max: 1989; GFX11-GISEL: ; %bb.0: 1990; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1991; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1992; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff800 1993; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 1994; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1995; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1996; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 1997; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1998; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 1999; GFX11-GISEL-NEXT: s_endpgm 2000 %gep = getelementptr i8, ptr %p, i64 -2048 2001 %load = load volatile i8, ptr %gep, align 1 2002 store i8 %load, ptr undef 2003 ret void 2004} 2005 2006define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(ptr %p) { 2007; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max: 2008; GFX9-SDAG: ; %bb.0: 2009; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2010; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2011; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2012; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2013; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 2014; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 2015; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2016; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2017; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2018; GFX9-SDAG-NEXT: s_endpgm 2019; 2020; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max: 2021; GFX10: ; %bb.0: 2022; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2023; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2024; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 2025; GFX10-NEXT: s_addc_u32 s1, s1, -1 2026; GFX10-NEXT: v_mov_b32_e32 v0, s0 2027; GFX10-NEXT: v_mov_b32_e32 v1, s1 2028; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2029; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2030; GFX10-NEXT: flat_store_byte v[0:1], v0 2031; GFX10-NEXT: s_endpgm 2032; 2033; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max: 2034; GFX11-SDAG: ; %bb.0: 2035; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2036; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2037; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 2038; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2039; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2040; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2041; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2042; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2043; GFX11-SDAG-NEXT: s_endpgm 2044; 2045; GFX12-LABEL: flat_inst_salu_offset_neg_12bit_max: 2046; GFX12: ; %bb.0: 2047; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2048; GFX12-NEXT: s_wait_kmcnt 0x0 2049; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2050; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 scope:SCOPE_SYS 2051; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2052; GFX12-NEXT: flat_store_b8 v[0:1], v0 2053; GFX12-NEXT: s_endpgm 2054; 2055; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max: 2056; GFX9-GISEL: ; %bb.0: 2057; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2058; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2059; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2060; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 2061; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2062; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2063; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2064; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2065; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2066; GFX9-GISEL-NEXT: s_endpgm 2067; 2068; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max: 2069; GFX11-GISEL: ; %bb.0: 2070; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2071; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2072; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2073; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2074; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2075; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2076; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2077; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2078; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2079; GFX11-GISEL-NEXT: s_endpgm 2080 %gep = getelementptr i8, ptr %p, i64 -4096 2081 %load = load volatile i8, ptr %gep, align 1 2082 store i8 %load, ptr undef 2083 ret void 2084} 2085 2086define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(ptr %p) { 2087; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max: 2088; GFX9-SDAG: ; %bb.0: 2089; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2090; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2091; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2092; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2093; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 2094; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 2095; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2096; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2097; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2098; GFX9-SDAG-NEXT: s_endpgm 2099; 2100; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max: 2101; GFX10: ; %bb.0: 2102; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2103; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2104; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 2105; GFX10-NEXT: s_addc_u32 s1, s1, -1 2106; GFX10-NEXT: v_mov_b32_e32 v0, s0 2107; GFX10-NEXT: v_mov_b32_e32 v1, s1 2108; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2109; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2110; GFX10-NEXT: flat_store_byte v[0:1], v0 2111; GFX10-NEXT: s_endpgm 2112; 2113; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max: 2114; GFX11-SDAG: ; %bb.0: 2115; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2116; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2117; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2118; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2119; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2120; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2121; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2122; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2123; GFX11-SDAG-NEXT: s_endpgm 2124; 2125; GFX12-LABEL: flat_inst_salu_offset_neg_13bit_max: 2126; GFX12: ; %bb.0: 2127; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2128; GFX12-NEXT: s_wait_kmcnt 0x0 2129; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2130; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 scope:SCOPE_SYS 2131; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2132; GFX12-NEXT: flat_store_b8 v[0:1], v0 2133; GFX12-NEXT: s_endpgm 2134; 2135; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max: 2136; GFX9-GISEL: ; %bb.0: 2137; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2138; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2139; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2140; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 2141; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2142; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2143; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2144; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2145; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2146; GFX9-GISEL-NEXT: s_endpgm 2147; 2148; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max: 2149; GFX11-GISEL: ; %bb.0: 2150; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2151; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2152; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2153; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2154; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2155; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2156; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2157; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2158; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2159; GFX11-GISEL-NEXT: s_endpgm 2160 %gep = getelementptr i8, ptr %p, i64 -8192 2161 %load = load volatile i8, ptr %gep, align 1 2162 store i8 %load, ptr undef 2163 ret void 2164} 2165 2166define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) { 2167; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max: 2168; GFX9: ; %bb.0: 2169; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2170; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2171; GFX9-NEXT: v_mov_b32_e32 v0, s0 2172; GFX9-NEXT: v_mov_b32_e32 v1, s1 2173; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 2174; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2175; GFX9-NEXT: flat_store_byte v[0:1], v0 2176; GFX9-NEXT: s_endpgm 2177; 2178; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max: 2179; GFX10: ; %bb.0: 2180; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2181; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2182; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 2183; GFX10-NEXT: s_addc_u32 s1, s1, 0 2184; GFX10-NEXT: v_mov_b32_e32 v0, s0 2185; GFX10-NEXT: v_mov_b32_e32 v1, s1 2186; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2187; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2188; GFX10-NEXT: flat_store_byte v[0:1], v0 2189; GFX10-NEXT: s_endpgm 2190; 2191; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max: 2192; GFX11: ; %bb.0: 2193; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2194; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2195; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2196; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 2197; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2198; GFX11-NEXT: flat_store_b8 v[0:1], v0 2199; GFX11-NEXT: s_endpgm 2200; 2201; GFX12-LABEL: flat_inst_salu_offset_2x_11bit_max: 2202; GFX12: ; %bb.0: 2203; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2204; GFX12-NEXT: s_wait_kmcnt 0x0 2205; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2206; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS 2207; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2208; GFX12-NEXT: flat_store_b8 v[0:1], v0 2209; GFX12-NEXT: s_endpgm 2210 %gep = getelementptr i8, ptr %p, i64 4095 2211 %load = load volatile i8, ptr %gep, align 1 2212 store i8 %load, ptr undef 2213 ret void 2214} 2215 2216define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(ptr %p) { 2217; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max: 2218; GFX9-SDAG: ; %bb.0: 2219; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2220; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2221; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2222; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2223; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 2224; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 2225; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 2226; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2227; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2228; GFX9-SDAG-NEXT: s_endpgm 2229; 2230; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max: 2231; GFX10: ; %bb.0: 2232; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2233; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2234; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 2235; GFX10-NEXT: s_addc_u32 s1, s1, 0 2236; GFX10-NEXT: v_mov_b32_e32 v0, s0 2237; GFX10-NEXT: v_mov_b32_e32 v1, s1 2238; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2239; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2240; GFX10-NEXT: flat_store_byte v[0:1], v0 2241; GFX10-NEXT: s_endpgm 2242; 2243; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max: 2244; GFX11-SDAG: ; %bb.0: 2245; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2246; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2247; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 2248; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2249; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 2250; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 2251; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2252; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2253; GFX11-SDAG-NEXT: s_endpgm 2254; 2255; GFX12-LABEL: flat_inst_salu_offset_2x_12bit_max: 2256; GFX12: ; %bb.0: 2257; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2258; GFX12-NEXT: s_wait_kmcnt 0x0 2259; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2260; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS 2261; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2262; GFX12-NEXT: flat_store_b8 v[0:1], v0 2263; GFX12-NEXT: s_endpgm 2264; 2265; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max: 2266; GFX9-GISEL: ; %bb.0: 2267; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2268; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2269; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 2270; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 2271; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2272; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2273; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2274; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2275; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2276; GFX9-GISEL-NEXT: s_endpgm 2277; 2278; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max: 2279; GFX11-GISEL: ; %bb.0: 2280; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2281; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2282; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 2283; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0 2284; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2285; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2286; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2287; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2288; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2289; GFX11-GISEL-NEXT: s_endpgm 2290 %gep = getelementptr i8, ptr %p, i64 8191 2291 %load = load volatile i8, ptr %gep, align 1 2292 store i8 %load, ptr undef 2293 ret void 2294} 2295 2296define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(ptr %p) { 2297; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max: 2298; GFX9-SDAG: ; %bb.0: 2299; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2300; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2301; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2302; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2303; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0 2304; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 2305; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 2306; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2307; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2308; GFX9-SDAG-NEXT: s_endpgm 2309; 2310; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max: 2311; GFX10: ; %bb.0: 2312; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2313; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2314; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff 2315; GFX10-NEXT: s_addc_u32 s1, s1, 0 2316; GFX10-NEXT: v_mov_b32_e32 v0, s0 2317; GFX10-NEXT: v_mov_b32_e32 v1, s1 2318; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2319; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2320; GFX10-NEXT: flat_store_byte v[0:1], v0 2321; GFX10-NEXT: s_endpgm 2322; 2323; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max: 2324; GFX11-SDAG: ; %bb.0: 2325; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2326; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2327; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x3000, s0 2328; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2329; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0 2330; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 2331; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2332; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2333; GFX11-SDAG-NEXT: s_endpgm 2334; 2335; GFX12-LABEL: flat_inst_salu_offset_2x_13bit_max: 2336; GFX12: ; %bb.0: 2337; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2338; GFX12-NEXT: s_wait_kmcnt 0x0 2339; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2340; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:16383 scope:SCOPE_SYS 2341; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2342; GFX12-NEXT: flat_store_b8 v[0:1], v0 2343; GFX12-NEXT: s_endpgm 2344; 2345; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max: 2346; GFX9-GISEL: ; %bb.0: 2347; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2348; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2349; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x3fff 2350; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 2351; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2352; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2353; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2354; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2355; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2356; GFX9-GISEL-NEXT: s_endpgm 2357; 2358; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max: 2359; GFX11-GISEL: ; %bb.0: 2360; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2361; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2362; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x3fff 2363; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0 2364; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2365; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2366; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2367; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2368; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2369; GFX11-GISEL-NEXT: s_endpgm 2370 %gep = getelementptr i8, ptr %p, i64 16383 2371 %load = load volatile i8, ptr %gep, align 1 2372 store i8 %load, ptr undef 2373 ret void 2374} 2375 2376define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(ptr %p) { 2377; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2378; GFX9-SDAG: ; %bb.0: 2379; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2380; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2381; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2382; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2383; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0 2384; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 2385; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2386; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2387; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2388; GFX9-SDAG-NEXT: s_endpgm 2389; 2390; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2391; GFX10: ; %bb.0: 2392; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2393; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2394; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000 2395; GFX10-NEXT: s_addc_u32 s1, s1, -1 2396; GFX10-NEXT: v_mov_b32_e32 v0, s0 2397; GFX10-NEXT: v_mov_b32_e32 v1, s1 2398; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2399; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2400; GFX10-NEXT: flat_store_byte v[0:1], v0 2401; GFX10-NEXT: s_endpgm 2402; 2403; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2404; GFX11-SDAG: ; %bb.0: 2405; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2406; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2407; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0 2408; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2409; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2410; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2411; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2412; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2413; GFX11-SDAG-NEXT: s_endpgm 2414; 2415; GFX12-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2416; GFX12: ; %bb.0: 2417; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2418; GFX12-NEXT: s_wait_kmcnt 0x0 2419; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2420; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-4096 scope:SCOPE_SYS 2421; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2422; GFX12-NEXT: flat_store_b8 v[0:1], v0 2423; GFX12-NEXT: s_endpgm 2424; 2425; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2426; GFX9-GISEL: ; %bb.0: 2427; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2428; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2429; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2430; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 2431; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2432; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2433; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2434; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2435; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2436; GFX9-GISEL-NEXT: s_endpgm 2437; 2438; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max: 2439; GFX11-GISEL: ; %bb.0: 2440; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2441; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2442; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000 2443; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2444; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2445; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2446; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2447; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2448; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2449; GFX11-GISEL-NEXT: s_endpgm 2450 %gep = getelementptr i8, ptr %p, i64 -4096 2451 %load = load volatile i8, ptr %gep, align 1 2452 store i8 %load, ptr undef 2453 ret void 2454} 2455 2456define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(ptr %p) { 2457; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2458; GFX9-SDAG: ; %bb.0: 2459; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2460; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2461; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2462; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2463; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0 2464; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 2465; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2466; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2467; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2468; GFX9-SDAG-NEXT: s_endpgm 2469; 2470; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2471; GFX10: ; %bb.0: 2472; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2473; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2474; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000 2475; GFX10-NEXT: s_addc_u32 s1, s1, -1 2476; GFX10-NEXT: v_mov_b32_e32 v0, s0 2477; GFX10-NEXT: v_mov_b32_e32 v1, s1 2478; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2479; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2480; GFX10-NEXT: flat_store_byte v[0:1], v0 2481; GFX10-NEXT: s_endpgm 2482; 2483; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2484; GFX11-SDAG: ; %bb.0: 2485; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2486; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2487; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0 2488; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2489; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2490; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2491; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2492; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2493; GFX11-SDAG-NEXT: s_endpgm 2494; 2495; GFX12-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2496; GFX12: ; %bb.0: 2497; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2498; GFX12-NEXT: s_wait_kmcnt 0x0 2499; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2500; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-8192 scope:SCOPE_SYS 2501; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2502; GFX12-NEXT: flat_store_b8 v[0:1], v0 2503; GFX12-NEXT: s_endpgm 2504; 2505; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2506; GFX9-GISEL: ; %bb.0: 2507; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2508; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2509; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2510; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 2511; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2512; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2513; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2514; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2515; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2516; GFX9-GISEL-NEXT: s_endpgm 2517; 2518; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max: 2519; GFX11-GISEL: ; %bb.0: 2520; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2521; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2522; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000 2523; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2524; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2525; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2526; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2527; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2528; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2529; GFX11-GISEL-NEXT: s_endpgm 2530 %gep = getelementptr i8, ptr %p, i64 -8192 2531 %load = load volatile i8, ptr %gep, align 1 2532 store i8 %load, ptr undef 2533 ret void 2534} 2535 2536define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(ptr %p) { 2537; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2538; GFX9-SDAG: ; %bb.0: 2539; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2540; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2541; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2542; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2543; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0 2544; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc 2545; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2546; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2547; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2548; GFX9-SDAG-NEXT: s_endpgm 2549; 2550; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2551; GFX10: ; %bb.0: 2552; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2553; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2554; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000 2555; GFX10-NEXT: s_addc_u32 s1, s1, -1 2556; GFX10-NEXT: v_mov_b32_e32 v0, s0 2557; GFX10-NEXT: v_mov_b32_e32 v1, s1 2558; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2559; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2560; GFX10-NEXT: flat_store_byte v[0:1], v0 2561; GFX10-NEXT: s_endpgm 2562; 2563; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2564; GFX11-SDAG: ; %bb.0: 2565; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2566; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2567; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0 2568; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2569; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0 2570; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2571; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2572; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2573; GFX11-SDAG-NEXT: s_endpgm 2574; 2575; GFX12-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2576; GFX12: ; %bb.0: 2577; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2578; GFX12-NEXT: s_wait_kmcnt 0x0 2579; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2580; GFX12-NEXT: flat_load_u8 v0, v[0:1] offset:-16384 scope:SCOPE_SYS 2581; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2582; GFX12-NEXT: flat_store_b8 v[0:1], v0 2583; GFX12-NEXT: s_endpgm 2584; 2585; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2586; GFX9-GISEL: ; %bb.0: 2587; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2588; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2589; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000 2590; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1 2591; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2592; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2593; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2594; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2595; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2596; GFX9-GISEL-NEXT: s_endpgm 2597; 2598; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max: 2599; GFX11-GISEL: ; %bb.0: 2600; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2601; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2602; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000 2603; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1 2604; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2605; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2606; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2607; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2608; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2609; GFX11-GISEL-NEXT: s_endpgm 2610 %gep = getelementptr i8, ptr %p, i64 -16384 2611 %load = load volatile i8, ptr %gep, align 1 2612 store i8 %load, ptr undef 2613 ret void 2614} 2615 2616; Fill 11-bit low-bits (1ull << 33) | 2047 2617define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) { 2618; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2619; GFX9-SDAG: ; %bb.0: 2620; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2621; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2622; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2623; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 2624; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 2625; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc 2626; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2627; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2628; GFX9-SDAG-NEXT: s_endpgm 2629; 2630; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2631; GFX10: ; %bb.0: 2632; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2633; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2634; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 2635; GFX10-NEXT: s_addc_u32 s1, s1, 2 2636; GFX10-NEXT: v_mov_b32_e32 v0, s0 2637; GFX10-NEXT: v_mov_b32_e32 v1, s1 2638; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2639; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2640; GFX10-NEXT: flat_store_byte v[0:1], v0 2641; GFX10-NEXT: s_endpgm 2642; 2643; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2644; GFX11-SDAG: ; %bb.0: 2645; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2646; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2647; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2648; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2649; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2650; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc 2651; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2652; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2653; GFX11-SDAG-NEXT: s_endpgm 2654; 2655; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2656; GFX12-SDAG: ; %bb.0: 2657; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2658; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2659; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2660; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2661; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2662; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 scope:SCOPE_SYS 2663; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2664; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 2665; GFX12-SDAG-NEXT: s_endpgm 2666; 2667; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2668; GFX9-GISEL: ; %bb.0: 2669; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2670; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2671; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 2672; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 2673; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2674; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2675; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2676; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2677; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2678; GFX9-GISEL-NEXT: s_endpgm 2679; 2680; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2681; GFX11-GISEL: ; %bb.0: 2682; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2683; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2684; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 2685; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2686; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2687; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2688; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2689; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2690; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2691; GFX11-GISEL-NEXT: s_endpgm 2692; 2693; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0: 2694; GFX12-GISEL: ; %bb.0: 2695; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2696; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2697; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x7ff 2698; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2699; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2700; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2701; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 2702; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2703; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 2704; GFX12-GISEL-NEXT: s_endpgm 2705 %gep = getelementptr i8, ptr %p, i64 8589936639 2706 %load = load volatile i8, ptr %gep, align 1 2707 store i8 %load, ptr undef 2708 ret void 2709} 2710 2711; Fill 11-bit low-bits (1ull << 33) | 2048 2712define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) { 2713; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2714; GFX9-SDAG: ; %bb.0: 2715; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2716; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2717; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2718; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 2719; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 2720; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 glc 2721; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2722; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2723; GFX9-SDAG-NEXT: s_endpgm 2724; 2725; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2726; GFX10: ; %bb.0: 2727; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2728; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2729; GFX10-NEXT: s_add_u32 s0, s0, 0x800 2730; GFX10-NEXT: s_addc_u32 s1, s1, 2 2731; GFX10-NEXT: v_mov_b32_e32 v0, s0 2732; GFX10-NEXT: v_mov_b32_e32 v1, s1 2733; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2734; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2735; GFX10-NEXT: flat_store_byte v[0:1], v0 2736; GFX10-NEXT: s_endpgm 2737; 2738; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2739; GFX11-SDAG: ; %bb.0: 2740; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2741; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2742; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2743; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2744; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2745; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc 2746; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2747; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2748; GFX11-SDAG-NEXT: s_endpgm 2749; 2750; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2751; GFX12-SDAG: ; %bb.0: 2752; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2753; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2754; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2755; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2756; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2757; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 scope:SCOPE_SYS 2758; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2759; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 2760; GFX12-SDAG-NEXT: s_endpgm 2761; 2762; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2763; GFX9-GISEL: ; %bb.0: 2764; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2765; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2766; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x800 2767; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 2768; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2769; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2770; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2771; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2772; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2773; GFX9-GISEL-NEXT: s_endpgm 2774; 2775; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2776; GFX11-GISEL: ; %bb.0: 2777; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2778; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2779; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800 2780; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2781; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2782; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2783; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2784; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2785; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2786; GFX11-GISEL-NEXT: s_endpgm 2787; 2788; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1: 2789; GFX12-GISEL: ; %bb.0: 2790; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2791; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2792; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800 2793; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2794; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2795; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2796; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 2797; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2798; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 2799; GFX12-GISEL-NEXT: s_endpgm 2800 %gep = getelementptr i8, ptr %p, i64 8589936640 2801 %load = load volatile i8, ptr %gep, align 1 2802 store i8 %load, ptr undef 2803 ret void 2804} 2805 2806; Fill 12-bit low-bits (1ull << 33) | 4095 2807define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) { 2808; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2809; GFX9-SDAG: ; %bb.0: 2810; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2811; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2812; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2813; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0 2814; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 2815; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 2816; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2817; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2818; GFX9-SDAG-NEXT: s_endpgm 2819; 2820; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2821; GFX10: ; %bb.0: 2822; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2823; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2824; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 2825; GFX10-NEXT: s_addc_u32 s1, s1, 2 2826; GFX10-NEXT: v_mov_b32_e32 v0, s0 2827; GFX10-NEXT: v_mov_b32_e32 v1, s1 2828; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2829; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2830; GFX10-NEXT: flat_store_byte v[0:1], v0 2831; GFX10-NEXT: s_endpgm 2832; 2833; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2834; GFX11-SDAG: ; %bb.0: 2835; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2836; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2837; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2838; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2839; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2840; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 2841; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2842; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2843; GFX11-SDAG-NEXT: s_endpgm 2844; 2845; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2846; GFX12-SDAG: ; %bb.0: 2847; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2848; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2849; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2850; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2851; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2852; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 scope:SCOPE_SYS 2853; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2854; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 2855; GFX12-SDAG-NEXT: s_endpgm 2856; 2857; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2858; GFX9-GISEL: ; %bb.0: 2859; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2860; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2861; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 2862; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 2863; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2864; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2865; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2866; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2867; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2868; GFX9-GISEL-NEXT: s_endpgm 2869; 2870; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2871; GFX11-GISEL: ; %bb.0: 2872; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2873; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2874; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 2875; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2876; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2877; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2878; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2879; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2880; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2881; GFX11-GISEL-NEXT: s_endpgm 2882; 2883; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0: 2884; GFX12-GISEL: ; %bb.0: 2885; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2886; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2887; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xfff 2888; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2889; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2890; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2891; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 2892; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2893; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 2894; GFX12-GISEL-NEXT: s_endpgm 2895 %gep = getelementptr i8, ptr %p, i64 8589938687 2896 %load = load volatile i8, ptr %gep, align 1 2897 store i8 %load, ptr undef 2898 ret void 2899} 2900 2901; Fill 12-bit low-bits (1ull << 33) | 4096 2902define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) { 2903; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2904; GFX9-SDAG: ; %bb.0: 2905; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2906; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2907; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 2908; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 2909; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 2910; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 2911; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 2912; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2913; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 2914; GFX9-SDAG-NEXT: s_endpgm 2915; 2916; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2917; GFX10: ; %bb.0: 2918; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2919; GFX10-NEXT: s_waitcnt lgkmcnt(0) 2920; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 2921; GFX10-NEXT: s_addc_u32 s1, s1, 2 2922; GFX10-NEXT: v_mov_b32_e32 v0, s0 2923; GFX10-NEXT: v_mov_b32_e32 v1, s1 2924; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 2925; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2926; GFX10-NEXT: flat_store_byte v[0:1], v0 2927; GFX10-NEXT: s_endpgm 2928; 2929; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2930; GFX11-SDAG: ; %bb.0: 2931; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2932; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2933; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 2934; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2935; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2936; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2937; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2938; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 2939; GFX11-SDAG-NEXT: s_endpgm 2940; 2941; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2942; GFX12-SDAG: ; %bb.0: 2943; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2944; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 2945; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 2946; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2947; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 2948; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4096 scope:SCOPE_SYS 2949; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 2950; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 2951; GFX12-SDAG-NEXT: s_endpgm 2952; 2953; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2954; GFX9-GISEL: ; %bb.0: 2955; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2956; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2957; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 2958; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 2959; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 2960; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 2961; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 2962; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2963; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 2964; GFX9-GISEL-NEXT: s_endpgm 2965; 2966; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2967; GFX11-GISEL: ; %bb.0: 2968; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2969; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2970; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 2971; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 2972; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2973; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2974; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 2975; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 2976; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 2977; GFX11-GISEL-NEXT: s_endpgm 2978; 2979; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1: 2980; GFX12-GISEL: ; %bb.0: 2981; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2982; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 2983; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1000 2984; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 2985; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2986; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 2987; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 2988; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 2989; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 2990; GFX12-GISEL-NEXT: s_endpgm 2991 %gep = getelementptr i8, ptr %p, i64 8589938688 2992 %load = load volatile i8, ptr %gep, align 1 2993 store i8 %load, ptr undef 2994 ret void 2995} 2996 2997; Fill 13-bit low-bits (1ull << 33) | 8191 2998define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) { 2999; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3000; GFX9-SDAG: ; %bb.0: 3001; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3002; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3003; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3004; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 3005; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 3006; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 3007; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc 3008; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3009; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3010; GFX9-SDAG-NEXT: s_endpgm 3011; 3012; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3013; GFX10: ; %bb.0: 3014; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3015; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3016; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 3017; GFX10-NEXT: s_addc_u32 s1, s1, 2 3018; GFX10-NEXT: v_mov_b32_e32 v0, s0 3019; GFX10-NEXT: v_mov_b32_e32 v1, s1 3020; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3021; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3022; GFX10-NEXT: flat_store_byte v[0:1], v0 3023; GFX10-NEXT: s_endpgm 3024; 3025; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3026; GFX11-SDAG: ; %bb.0: 3027; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3028; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3029; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0 3030; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3031; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3032; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc 3033; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3034; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3035; GFX11-SDAG-NEXT: s_endpgm 3036; 3037; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3038; GFX12-SDAG: ; %bb.0: 3039; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3040; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3041; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 3042; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3043; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3044; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8191 scope:SCOPE_SYS 3045; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3046; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3047; GFX12-SDAG-NEXT: s_endpgm 3048; 3049; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3050; GFX9-GISEL: ; %bb.0: 3051; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3052; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3053; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 3054; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 3055; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3056; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3057; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3058; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3059; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3060; GFX9-GISEL-NEXT: s_endpgm 3061; 3062; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3063; GFX11-GISEL: ; %bb.0: 3064; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3065; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3066; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 3067; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 3068; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3069; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3070; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3071; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3072; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3073; GFX11-GISEL-NEXT: s_endpgm 3074; 3075; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0: 3076; GFX12-GISEL: ; %bb.0: 3077; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3078; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3079; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1fff 3080; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 3081; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3082; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3083; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3084; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3085; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3086; GFX12-GISEL-NEXT: s_endpgm 3087 %gep = getelementptr i8, ptr %p, i64 8589942783 3088 %load = load volatile i8, ptr %gep, align 1 3089 store i8 %load, ptr undef 3090 ret void 3091} 3092 3093; Fill 13-bit low-bits (1ull << 33) | 8192 3094define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) { 3095; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3096; GFX9-SDAG: ; %bb.0: 3097; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3098; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3099; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3100; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1 3101; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 3102; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc 3103; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3104; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3105; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3106; GFX9-SDAG-NEXT: s_endpgm 3107; 3108; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3109; GFX10: ; %bb.0: 3110; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3111; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3112; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 3113; GFX10-NEXT: s_addc_u32 s1, s1, 2 3114; GFX10-NEXT: v_mov_b32_e32 v0, s0 3115; GFX10-NEXT: v_mov_b32_e32 v1, s1 3116; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3117; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3118; GFX10-NEXT: flat_store_byte v[0:1], v0 3119; GFX10-NEXT: s_endpgm 3120; 3121; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3122; GFX11-SDAG: ; %bb.0: 3123; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3124; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3125; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0 3126; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3127; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3128; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3129; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3130; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3131; GFX11-SDAG-NEXT: s_endpgm 3132; 3133; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3134; GFX12-SDAG: ; %bb.0: 3135; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3136; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3137; GFX12-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0 3138; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 3139; GFX12-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0 3140; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:8192 scope:SCOPE_SYS 3141; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3142; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3143; GFX12-SDAG-NEXT: s_endpgm 3144; 3145; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3146; GFX9-GISEL: ; %bb.0: 3147; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3148; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3149; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3150; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2 3151; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3152; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3153; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3154; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3155; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3156; GFX9-GISEL-NEXT: s_endpgm 3157; 3158; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3159; GFX11-GISEL: ; %bb.0: 3160; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3161; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3162; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3163; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2 3164; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3165; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3166; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3167; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3168; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3169; GFX11-GISEL-NEXT: s_endpgm 3170; 3171; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1: 3172; GFX12-GISEL: ; %bb.0: 3173; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3174; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3175; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x2000 3176; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 2 3177; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3178; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3179; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3180; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3181; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3182; GFX12-GISEL-NEXT: s_endpgm 3183 %gep = getelementptr i8, ptr %p, i64 8589942784 3184 %load = load volatile i8, ptr %gep, align 1 3185 store i8 %load, ptr undef 3186 ret void 3187} 3188 3189; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047 3190define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr %p) { 3191; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3192; GFX9-SDAG: ; %bb.0: 3193; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3194; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3195; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3196; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3197; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3198; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0 3199; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3200; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3201; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3202; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3203; GFX9-SDAG-NEXT: s_endpgm 3204; 3205; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3206; GFX10: ; %bb.0: 3207; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3208; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3209; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff 3210; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3211; GFX10-NEXT: v_mov_b32_e32 v0, s0 3212; GFX10-NEXT: v_mov_b32_e32 v1, s1 3213; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3214; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3215; GFX10-NEXT: flat_store_byte v[0:1], v0 3216; GFX10-NEXT: s_endpgm 3217; 3218; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3219; GFX11-SDAG: ; %bb.0: 3220; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3221; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3222; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3223; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0 3224; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3225; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3226; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3227; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3228; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3229; GFX11-SDAG-NEXT: s_endpgm 3230; 3231; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3232; GFX12-SDAG: ; %bb.0: 3233; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3234; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3235; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3236; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3237; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3238; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3239; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8386561 scope:SCOPE_SYS 3240; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3241; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3242; GFX12-SDAG-NEXT: s_endpgm 3243; 3244; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3245; GFX9-GISEL: ; %bb.0: 3246; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3247; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3248; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 3249; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3250; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3251; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3252; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3253; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3254; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3255; GFX9-GISEL-NEXT: s_endpgm 3256; 3257; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3258; GFX11-GISEL: ; %bb.0: 3259; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3260; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3261; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff 3262; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3263; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3264; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3265; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3266; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3267; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3268; GFX11-GISEL-NEXT: s_endpgm 3269; 3270; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0: 3271; GFX12-GISEL: ; %bb.0: 3272; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3273; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3274; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x7ff 3275; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3276; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3277; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3278; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3279; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3280; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3281; GFX12-GISEL-NEXT: s_endpgm 3282 %gep = getelementptr i8, ptr %p, i64 -9223372036854773761 3283 %load = load volatile i8, ptr %gep, align 1 3284 store i8 %load, ptr undef 3285 ret void 3286} 3287 3288; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048 3289define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr %p) { 3290; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3291; GFX9-SDAG: ; %bb.0: 3292; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3293; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3294; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3295; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3296; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3297; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0 3298; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3299; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3300; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3301; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3302; GFX9-SDAG-NEXT: s_endpgm 3303; 3304; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3305; GFX10: ; %bb.0: 3306; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3307; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3308; GFX10-NEXT: s_add_u32 s0, s0, 0x800 3309; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3310; GFX10-NEXT: v_mov_b32_e32 v0, s0 3311; GFX10-NEXT: v_mov_b32_e32 v1, s1 3312; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3313; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3314; GFX10-NEXT: flat_store_byte v[0:1], v0 3315; GFX10-NEXT: s_endpgm 3316; 3317; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3318; GFX11-SDAG: ; %bb.0: 3319; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3320; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3321; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3322; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0 3323; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3324; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3325; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3326; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3327; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3328; GFX11-SDAG-NEXT: s_endpgm 3329; 3330; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3331; GFX12-SDAG: ; %bb.0: 3332; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3333; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3334; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3335; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3336; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3337; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3338; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8386560 scope:SCOPE_SYS 3339; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3340; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3341; GFX12-SDAG-NEXT: s_endpgm 3342; 3343; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3344; GFX9-GISEL: ; %bb.0: 3345; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3346; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3347; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x800 3348; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3349; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3350; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3351; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3352; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3353; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3354; GFX9-GISEL-NEXT: s_endpgm 3355; 3356; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3357; GFX11-GISEL: ; %bb.0: 3358; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3359; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3360; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800 3361; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3362; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3363; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3364; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3365; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3366; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3367; GFX11-GISEL-NEXT: s_endpgm 3368; 3369; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1: 3370; GFX12-GISEL: ; %bb.0: 3371; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3372; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3373; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800 3374; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3375; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3376; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3377; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3378; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3379; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3380; GFX12-GISEL-NEXT: s_endpgm 3381 %gep = getelementptr i8, ptr %p, i64 -9223372036854773760 3382 %load = load volatile i8, ptr %gep, align 1 3383 store i8 %load, ptr undef 3384 ret void 3385} 3386 3387; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095 3388define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr %p) { 3389; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3390; GFX9-SDAG: ; %bb.0: 3391; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3392; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3393; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3394; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3395; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3396; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0 3397; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3398; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3399; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3400; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3401; GFX9-SDAG-NEXT: s_endpgm 3402; 3403; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3404; GFX10: ; %bb.0: 3405; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3406; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3407; GFX10-NEXT: s_add_u32 s0, s0, 0xfff 3408; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3409; GFX10-NEXT: v_mov_b32_e32 v0, s0 3410; GFX10-NEXT: v_mov_b32_e32 v1, s1 3411; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3412; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3413; GFX10-NEXT: flat_store_byte v[0:1], v0 3414; GFX10-NEXT: s_endpgm 3415; 3416; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3417; GFX11-SDAG: ; %bb.0: 3418; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3419; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3420; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3421; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0 3422; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3423; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3424; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3425; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3426; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3427; GFX11-SDAG-NEXT: s_endpgm 3428; 3429; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3430; GFX12-SDAG: ; %bb.0: 3431; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3432; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3433; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3434; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3435; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3436; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3437; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8384513 scope:SCOPE_SYS 3438; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3439; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3440; GFX12-SDAG-NEXT: s_endpgm 3441; 3442; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3443; GFX9-GISEL: ; %bb.0: 3444; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3445; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3446; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 3447; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3448; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3449; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3450; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3451; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3452; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3453; GFX9-GISEL-NEXT: s_endpgm 3454; 3455; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3456; GFX11-GISEL: ; %bb.0: 3457; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3458; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3459; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff 3460; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3461; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3462; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3463; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3464; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3465; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3466; GFX11-GISEL-NEXT: s_endpgm 3467; 3468; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0: 3469; GFX12-GISEL: ; %bb.0: 3470; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3471; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3472; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xfff 3473; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3474; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3475; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3476; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3477; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3478; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3479; GFX12-GISEL-NEXT: s_endpgm 3480 %gep = getelementptr i8, ptr %p, i64 -9223372036854771713 3481 %load = load volatile i8, ptr %gep, align 1 3482 store i8 %load, ptr undef 3483 ret void 3484} 3485 3486; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096 3487define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr %p) { 3488; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3489; GFX9-SDAG: ; %bb.0: 3490; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3491; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3492; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3493; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3494; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3495; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 3496; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3497; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3498; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3499; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3500; GFX9-SDAG-NEXT: s_endpgm 3501; 3502; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3503; GFX10: ; %bb.0: 3504; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3505; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3506; GFX10-NEXT: s_add_u32 s0, s0, 0x1000 3507; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3508; GFX10-NEXT: v_mov_b32_e32 v0, s0 3509; GFX10-NEXT: v_mov_b32_e32 v1, s1 3510; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3511; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3512; GFX10-NEXT: flat_store_byte v[0:1], v0 3513; GFX10-NEXT: s_endpgm 3514; 3515; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3516; GFX11-SDAG: ; %bb.0: 3517; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3518; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3519; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3520; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0 3521; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3522; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3523; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3524; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3525; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3526; GFX11-SDAG-NEXT: s_endpgm 3527; 3528; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3529; GFX12-SDAG: ; %bb.0: 3530; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3531; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3532; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3533; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3534; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3535; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3536; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8384512 scope:SCOPE_SYS 3537; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3538; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3539; GFX12-SDAG-NEXT: s_endpgm 3540; 3541; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3542; GFX9-GISEL: ; %bb.0: 3543; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3544; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3545; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 3546; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3547; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3548; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3549; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3550; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3551; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3552; GFX9-GISEL-NEXT: s_endpgm 3553; 3554; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3555; GFX11-GISEL: ; %bb.0: 3556; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3557; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3558; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000 3559; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3560; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3561; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3562; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3563; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3564; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3565; GFX11-GISEL-NEXT: s_endpgm 3566; 3567; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1: 3568; GFX12-GISEL: ; %bb.0: 3569; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3570; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3571; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1000 3572; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3573; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3574; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3575; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3576; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3577; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3578; GFX12-GISEL-NEXT: s_endpgm 3579 %gep = getelementptr i8, ptr %p, i64 -9223372036854771712 3580 %load = load volatile i8, ptr %gep, align 1 3581 store i8 %load, ptr undef 3582 ret void 3583} 3584 3585; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191 3586define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr %p) { 3587; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3588; GFX9-SDAG: ; %bb.0: 3589; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3590; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3591; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3592; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3593; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3594; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0 3595; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3596; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3597; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3598; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3599; GFX9-SDAG-NEXT: s_endpgm 3600; 3601; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3602; GFX10: ; %bb.0: 3603; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3604; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3605; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff 3606; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3607; GFX10-NEXT: v_mov_b32_e32 v0, s0 3608; GFX10-NEXT: v_mov_b32_e32 v1, s1 3609; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3610; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3611; GFX10-NEXT: flat_store_byte v[0:1], v0 3612; GFX10-NEXT: s_endpgm 3613; 3614; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3615; GFX11-SDAG: ; %bb.0: 3616; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3617; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3618; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3619; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0 3620; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3621; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3622; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3623; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3624; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3625; GFX11-SDAG-NEXT: s_endpgm 3626; 3627; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3628; GFX12-SDAG: ; %bb.0: 3629; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3630; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3631; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3632; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3633; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3634; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3635; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8380417 scope:SCOPE_SYS 3636; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3637; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3638; GFX12-SDAG-NEXT: s_endpgm 3639; 3640; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3641; GFX9-GISEL: ; %bb.0: 3642; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3643; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3644; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 3645; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3646; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3647; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3648; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3649; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3650; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3651; GFX9-GISEL-NEXT: s_endpgm 3652; 3653; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3654; GFX11-GISEL: ; %bb.0: 3655; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3656; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3657; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff 3658; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3659; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3660; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3661; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3662; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3663; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3664; GFX11-GISEL-NEXT: s_endpgm 3665; 3666; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0: 3667; GFX12-GISEL: ; %bb.0: 3668; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3669; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3670; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x1fff 3671; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3672; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3673; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3674; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3675; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3676; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3677; GFX12-GISEL-NEXT: s_endpgm 3678 %gep = getelementptr i8, ptr %p, i64 -9223372036854767617 3679 %load = load volatile i8, ptr %gep, align 1 3680 store i8 %load, ptr undef 3681 ret void 3682} 3683 3684; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192 3685define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr %p) { 3686; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3687; GFX9-SDAG: ; %bb.0: 3688; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3689; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1 3690; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3691; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0 3692; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1 3693; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0 3694; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 3695; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc 3696; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3697; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0 3698; GFX9-SDAG-NEXT: s_endpgm 3699; 3700; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3701; GFX10: ; %bb.0: 3702; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3703; GFX10-NEXT: s_waitcnt lgkmcnt(0) 3704; GFX10-NEXT: s_add_u32 s0, s0, 0x2000 3705; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000 3706; GFX10-NEXT: v_mov_b32_e32 v0, s0 3707; GFX10-NEXT: v_mov_b32_e32 v1, s1 3708; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc 3709; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3710; GFX10-NEXT: flat_store_byte v[0:1], v0 3711; GFX10-NEXT: s_endpgm 3712; 3713; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3714; GFX11-SDAG: ; %bb.0: 3715; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3716; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 3717; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1 3718; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0 3719; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3720; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3721; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3722; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3723; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0 3724; GFX11-SDAG-NEXT: s_endpgm 3725; 3726; GFX12-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3727; GFX12-SDAG: ; %bb.0: 3728; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3729; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 3730; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s1 3731; GFX12-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800000, s0 3732; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 3733; GFX12-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo 3734; GFX12-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:-8380416 scope:SCOPE_SYS 3735; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 3736; GFX12-SDAG-NEXT: flat_store_b8 v[0:1], v0 3737; GFX12-SDAG-NEXT: s_endpgm 3738; 3739; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3740; GFX9-GISEL: ; %bb.0: 3741; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3742; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3743; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3744; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3745; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 3746; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1 3747; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc 3748; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3749; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0 3750; GFX9-GISEL-NEXT: s_endpgm 3751; 3752; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3753; GFX11-GISEL: ; %bb.0: 3754; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3755; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 3756; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000 3757; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000 3758; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3759; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3760; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc 3761; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 3762; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0 3763; GFX11-GISEL-NEXT: s_endpgm 3764; 3765; GFX12-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1: 3766; GFX12-GISEL: ; %bb.0: 3767; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 3768; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 3769; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x2000 3770; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0x80000000 3771; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3772; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 3773; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] scope:SCOPE_SYS 3774; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 3775; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 3776; GFX12-GISEL-NEXT: s_endpgm 3777 %gep = getelementptr i8, ptr %p, i64 -9223372036854767616 3778 %load = load volatile i8, ptr %gep, align 1 3779 store i8 %load, ptr undef 3780 ret void 3781} 3782;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 3783; GFX10-GISEL: {{.*}} 3784; GFX10-SDAG: {{.*}} 3785