1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-UNALIGNED %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX12,GFX12-NOUNALIGNED %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-UNALIGNED %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-NOUNALIGNED %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-UNALIGNED %s 7; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GCN,GFX7,GFX7-NOUNALIGNED %s 8 9; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 10 11define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) { 12; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align1: 13; GFX12-UNALIGNED: ; %bb.0: 14; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 15; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 16; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 17; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 18; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 19; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off 20; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 21; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: 24; GFX12-NOUNALIGNED: ; %bb.0: 25; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 26; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 27; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 28; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 29; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 30; GFX12-NOUNALIGNED-NEXT: s_clause 0xb 31; GFX12-NOUNALIGNED-NEXT: global_load_u8 v2, v[0:1], off 32; GFX12-NOUNALIGNED-NEXT: global_load_u8 v3, v[0:1], off offset:1 33; GFX12-NOUNALIGNED-NEXT: global_load_u8 v4, v[0:1], off offset:2 34; GFX12-NOUNALIGNED-NEXT: global_load_u8 v5, v[0:1], off offset:3 35; GFX12-NOUNALIGNED-NEXT: global_load_u8 v6, v[0:1], off offset:4 36; GFX12-NOUNALIGNED-NEXT: global_load_u8 v7, v[0:1], off offset:5 37; GFX12-NOUNALIGNED-NEXT: global_load_u8 v8, v[0:1], off offset:6 38; GFX12-NOUNALIGNED-NEXT: global_load_u8 v9, v[0:1], off offset:7 39; GFX12-NOUNALIGNED-NEXT: global_load_u8 v10, v[0:1], off offset:8 40; GFX12-NOUNALIGNED-NEXT: global_load_u8 v11, v[0:1], off offset:9 41; GFX12-NOUNALIGNED-NEXT: global_load_u8 v12, v[0:1], off offset:11 42; GFX12-NOUNALIGNED-NEXT: global_load_u8 v0, v[0:1], off offset:10 43; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0xa 44; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v3, 8, v2 45; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x9 46; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 47; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x8 48; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v5 49; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x6 50; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v4, v7, 8, v6 51; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x5 52; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v8 53; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4 54; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v9 55; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2 56; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v7, v11, 8, v10 57; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x1 58; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v12 59; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 60; GFX12-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v0 61; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v0, v2, v3, v1 62; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v1, v5, v6, v4 63; GFX12-NOUNALIGNED-NEXT: s_delay_alu instid0(VALU_DEP_3) 64; GFX12-NOUNALIGNED-NEXT: v_or3_b32 v2, v8, v9, v7 65; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1: 68; GFX9-UNALIGNED: ; %bb.0: 69; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 71; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 72; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: 75; GFX9-NOUNALIGNED: ; %bb.0: 76; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v[0:1], off 78; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v[0:1], off offset:1 79; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v[0:1], off offset:2 80; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v[0:1], off offset:3 81; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v[0:1], off offset:4 82; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v[0:1], off offset:5 83; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v[0:1], off offset:6 84; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v[0:1], off offset:7 85; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v[0:1], off offset:8 86; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v[0:1], off offset:9 87; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v[0:1], off offset:11 88; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v13, v[0:1], off offset:10 89; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 90; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 8, v2 91; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 92; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 93; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 94; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v5 95; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0 96; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 97; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v3, v7, 8, v6 98; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 99; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v8 100; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 101; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v9 102; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3 103; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 104; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v11, 8, v10 105; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 106; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v12 107; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 108; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v13 109; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6 110; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1: 113; GFX7-UNALIGNED: ; %bb.0: 114; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0 116; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000 117; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0 118; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 119; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 120; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align1: 123; GFX7-NOUNALIGNED: ; %bb.0: 124; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 126; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 127; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 128; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 offset:1 129; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:3 130; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2 131; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:5 132; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:7 133; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:6 134; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:9 135; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11 136; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:10 137; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 138; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:4 139; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8 140; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) 141; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v2 142; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 143; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 24, v3 144; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 145; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 16, v4 146; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 147; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 8, v5 148; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) 149; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 24, v6 150; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 151; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 16, v7 152; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 153; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 8, v8 154; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 155; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 24, v9 156; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 157; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v9, 16, v10 158; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 159; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v11 160; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v3 161; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 162; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v4, v12 163; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v5, v6 164; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 165; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v5, v7, v0 166; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v6, v8, v9 167; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v2, v1 168; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v4, v3 169; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v6, v5 170; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX6-LABEL: v_load_constant_v3i32_align1: 173; GFX6: ; %bb.0: 174; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX6-NEXT: s_mov_b32 s6, 0 176; GFX6-NEXT: s_mov_b32 s7, 0xf000 177; GFX6-NEXT: s_mov_b64 s[4:5], 0 178; GFX6-NEXT: buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64 offset:1 179; GFX6-NEXT: buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:3 180; GFX6-NEXT: buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2 181; GFX6-NEXT: buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:5 182; GFX6-NEXT: buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:7 183; GFX6-NEXT: buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:6 184; GFX6-NEXT: buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:9 185; GFX6-NEXT: buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11 186; GFX6-NEXT: buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:10 187; GFX6-NEXT: buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 188; GFX6-NEXT: buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:4 189; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:8 190; GFX6-NEXT: s_waitcnt vmcnt(11) 191; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v2 192; GFX6-NEXT: s_waitcnt vmcnt(10) 193; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v3 194; GFX6-NEXT: s_waitcnt vmcnt(9) 195; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v4 196; GFX6-NEXT: s_waitcnt vmcnt(8) 197; GFX6-NEXT: v_lshlrev_b32_e32 v4, 8, v5 198; GFX6-NEXT: s_waitcnt vmcnt(7) 199; GFX6-NEXT: v_lshlrev_b32_e32 v5, 24, v6 200; GFX6-NEXT: s_waitcnt vmcnt(6) 201; GFX6-NEXT: v_lshlrev_b32_e32 v6, 16, v7 202; GFX6-NEXT: s_waitcnt vmcnt(5) 203; GFX6-NEXT: v_lshlrev_b32_e32 v7, 8, v8 204; GFX6-NEXT: s_waitcnt vmcnt(4) 205; GFX6-NEXT: v_lshlrev_b32_e32 v8, 24, v9 206; GFX6-NEXT: s_waitcnt vmcnt(3) 207; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v10 208; GFX6-NEXT: s_waitcnt vmcnt(2) 209; GFX6-NEXT: v_or_b32_e32 v1, v1, v11 210; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 211; GFX6-NEXT: s_waitcnt vmcnt(1) 212; GFX6-NEXT: v_or_b32_e32 v3, v4, v12 213; GFX6-NEXT: v_or_b32_e32 v4, v5, v6 214; GFX6-NEXT: s_waitcnt vmcnt(0) 215; GFX6-NEXT: v_or_b32_e32 v5, v7, v0 216; GFX6-NEXT: v_or_b32_e32 v6, v8, v9 217; GFX6-NEXT: v_or_b32_e32 v0, v2, v1 218; GFX6-NEXT: v_or_b32_e32 v1, v4, v3 219; GFX6-NEXT: v_or_b32_e32 v2, v6, v5 220; GFX6-NEXT: s_setpc_b64 s[30:31] 221 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1 222 ret <3 x i32> %load 223} 224 225define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) { 226; GFX12-UNALIGNED-LABEL: v_load_constant_v3i32_align2: 227; GFX12-UNALIGNED: ; %bb.0: 228; GFX12-UNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 229; GFX12-UNALIGNED-NEXT: s_wait_expcnt 0x0 230; GFX12-UNALIGNED-NEXT: s_wait_samplecnt 0x0 231; GFX12-UNALIGNED-NEXT: s_wait_bvhcnt 0x0 232; GFX12-UNALIGNED-NEXT: s_wait_kmcnt 0x0 233; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v[0:1], off 234; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 235; GFX12-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX12-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: 238; GFX12-NOUNALIGNED: ; %bb.0: 239; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt_dscnt 0x0 240; GFX12-NOUNALIGNED-NEXT: s_wait_expcnt 0x0 241; GFX12-NOUNALIGNED-NEXT: s_wait_samplecnt 0x0 242; GFX12-NOUNALIGNED-NEXT: s_wait_bvhcnt 0x0 243; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 244; GFX12-NOUNALIGNED-NEXT: s_clause 0x5 245; GFX12-NOUNALIGNED-NEXT: global_load_u16 v2, v[0:1], off 246; GFX12-NOUNALIGNED-NEXT: global_load_u16 v3, v[0:1], off offset:2 247; GFX12-NOUNALIGNED-NEXT: global_load_u16 v4, v[0:1], off offset:4 248; GFX12-NOUNALIGNED-NEXT: global_load_u16 v5, v[0:1], off offset:6 249; GFX12-NOUNALIGNED-NEXT: global_load_u16 v6, v[0:1], off offset:8 250; GFX12-NOUNALIGNED-NEXT: global_load_u16 v7, v[0:1], off offset:10 251; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x4 252; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 253; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x2 254; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4 255; GFX12-NOUNALIGNED-NEXT: s_wait_loadcnt 0x0 256; GFX12-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6 257; GFX12-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2: 260; GFX9-UNALIGNED: ; %bb.0: 261; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 263; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 264; GFX9-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 265; 266; GFX9-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: 267; GFX9-NOUNALIGNED: ; %bb.0: 268; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v[0:1], off 270; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v[0:1], off offset:2 271; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v[0:1], off offset:4 272; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v[0:1], off offset:6 273; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v[0:1], off offset:8 274; GFX9-NOUNALIGNED-NEXT: global_load_ushort v7, v[0:1], off offset:10 275; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 276; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v3, 16, v2 277; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 278; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v5, 16, v4 279; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 280; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v7, 16, v6 281; GFX9-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 282; 283; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2: 284; GFX7-UNALIGNED: ; %bb.0: 285; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; GFX7-UNALIGNED-NEXT: s_mov_b32 s6, 0 287; GFX7-UNALIGNED-NEXT: s_mov_b32 s7, 0xf000 288; GFX7-UNALIGNED-NEXT: s_mov_b64 s[4:5], 0 289; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 290; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 291; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 292; 293; GFX7-NOUNALIGNED-LABEL: v_load_constant_v3i32_align2: 294; GFX7-NOUNALIGNED: ; %bb.0: 295; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s6, 0 297; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s7, 0xf000 298; GFX7-NOUNALIGNED-NEXT: s_mov_b64 s[4:5], 0 299; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 300; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:6 301; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:10 302; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 303; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:4 304; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v7, v[0:1], s[4:7], 0 addr64 offset:8 305; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 306; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v2 307; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 308; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v3 309; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 310; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 311; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 312; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v5 313; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 314; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v6 315; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 316; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v7 317; GFX7-NOUNALIGNED-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX6-LABEL: v_load_constant_v3i32_align2: 320; GFX6: ; %bb.0: 321; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX6-NEXT: s_mov_b32 s6, 0 323; GFX6-NEXT: s_mov_b32 s7, 0xf000 324; GFX6-NEXT: s_mov_b64 s[4:5], 0 325; GFX6-NEXT: buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 326; GFX6-NEXT: buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:6 327; GFX6-NEXT: buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:10 328; GFX6-NEXT: buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 329; GFX6-NEXT: buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:4 330; GFX6-NEXT: buffer_load_ushort v7, v[0:1], s[4:7], 0 addr64 offset:8 331; GFX6-NEXT: s_waitcnt vmcnt(5) 332; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v2 333; GFX6-NEXT: s_waitcnt vmcnt(4) 334; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v3 335; GFX6-NEXT: s_waitcnt vmcnt(3) 336; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v4 337; GFX6-NEXT: s_waitcnt vmcnt(2) 338; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 339; GFX6-NEXT: s_waitcnt vmcnt(1) 340; GFX6-NEXT: v_or_b32_e32 v1, v1, v6 341; GFX6-NEXT: s_waitcnt vmcnt(0) 342; GFX6-NEXT: v_or_b32_e32 v2, v2, v7 343; GFX6-NEXT: s_setpc_b64 s[30:31] 344 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2 345 ret <3 x i32> %load 346} 347 348define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) { 349; GFX12-LABEL: v_load_constant_v3i32_align4: 350; GFX12: ; %bb.0: 351; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 352; GFX12-NEXT: s_wait_expcnt 0x0 353; GFX12-NEXT: s_wait_samplecnt 0x0 354; GFX12-NEXT: s_wait_bvhcnt 0x0 355; GFX12-NEXT: s_wait_kmcnt 0x0 356; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 357; GFX12-NEXT: s_wait_loadcnt 0x0 358; GFX12-NEXT: s_setpc_b64 s[30:31] 359; 360; GFX9-LABEL: v_load_constant_v3i32_align4: 361; GFX9: ; %bb.0: 362; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 364; GFX9-NEXT: s_waitcnt vmcnt(0) 365; GFX9-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX7-LABEL: v_load_constant_v3i32_align4: 368; GFX7: ; %bb.0: 369; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX7-NEXT: s_mov_b32 s6, 0 371; GFX7-NEXT: s_mov_b32 s7, 0xf000 372; GFX7-NEXT: s_mov_b64 s[4:5], 0 373; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 374; GFX7-NEXT: s_waitcnt vmcnt(0) 375; GFX7-NEXT: s_setpc_b64 s[30:31] 376; 377; GFX6-LABEL: v_load_constant_v3i32_align4: 378; GFX6: ; %bb.0: 379; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GFX6-NEXT: s_mov_b32 s6, 0 381; GFX6-NEXT: s_mov_b32 s7, 0xf000 382; GFX6-NEXT: s_mov_b64 s[4:5], 0 383; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64 384; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8 385; GFX6-NEXT: s_waitcnt vmcnt(1) 386; GFX6-NEXT: v_mov_b32_e32 v0, v3 387; GFX6-NEXT: v_mov_b32_e32 v1, v4 388; GFX6-NEXT: s_waitcnt vmcnt(0) 389; GFX6-NEXT: s_setpc_b64 s[30:31] 390 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 391 ret <3 x i32> %load 392} 393 394define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) { 395; GFX12-LABEL: v_load_constant_i96_align8: 396; GFX12: ; %bb.0: 397; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 398; GFX12-NEXT: s_wait_expcnt 0x0 399; GFX12-NEXT: s_wait_samplecnt 0x0 400; GFX12-NEXT: s_wait_bvhcnt 0x0 401; GFX12-NEXT: s_wait_kmcnt 0x0 402; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 403; GFX12-NEXT: s_wait_loadcnt 0x0 404; GFX12-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX9-LABEL: v_load_constant_i96_align8: 407; GFX9: ; %bb.0: 408; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 410; GFX9-NEXT: s_waitcnt vmcnt(0) 411; GFX9-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX7-LABEL: v_load_constant_i96_align8: 414; GFX7: ; %bb.0: 415; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX7-NEXT: s_mov_b32 s6, 0 417; GFX7-NEXT: s_mov_b32 s7, 0xf000 418; GFX7-NEXT: s_mov_b64 s[4:5], 0 419; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 420; GFX7-NEXT: s_waitcnt vmcnt(0) 421; GFX7-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX6-LABEL: v_load_constant_i96_align8: 424; GFX6: ; %bb.0: 425; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX6-NEXT: s_mov_b32 s6, 0 427; GFX6-NEXT: s_mov_b32 s7, 0xf000 428; GFX6-NEXT: s_mov_b64 s[4:5], 0 429; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64 430; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8 431; GFX6-NEXT: s_waitcnt vmcnt(1) 432; GFX6-NEXT: v_mov_b32_e32 v0, v3 433; GFX6-NEXT: v_mov_b32_e32 v1, v4 434; GFX6-NEXT: s_waitcnt vmcnt(0) 435; GFX6-NEXT: s_setpc_b64 s[30:31] 436 %load = load i96, ptr addrspace(4) %ptr, align 8 437 ret i96 %load 438} 439 440define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) { 441; GFX12-LABEL: v_load_constant_v3i32_align8: 442; GFX12: ; %bb.0: 443; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 444; GFX12-NEXT: s_wait_expcnt 0x0 445; GFX12-NEXT: s_wait_samplecnt 0x0 446; GFX12-NEXT: s_wait_bvhcnt 0x0 447; GFX12-NEXT: s_wait_kmcnt 0x0 448; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 449; GFX12-NEXT: s_wait_loadcnt 0x0 450; GFX12-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX9-LABEL: v_load_constant_v3i32_align8: 453; GFX9: ; %bb.0: 454; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 456; GFX9-NEXT: s_waitcnt vmcnt(0) 457; GFX9-NEXT: s_setpc_b64 s[30:31] 458; 459; GFX7-LABEL: v_load_constant_v3i32_align8: 460; GFX7: ; %bb.0: 461; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX7-NEXT: s_mov_b32 s6, 0 463; GFX7-NEXT: s_mov_b32 s7, 0xf000 464; GFX7-NEXT: s_mov_b64 s[4:5], 0 465; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 466; GFX7-NEXT: s_waitcnt vmcnt(0) 467; GFX7-NEXT: s_setpc_b64 s[30:31] 468; 469; GFX6-LABEL: v_load_constant_v3i32_align8: 470; GFX6: ; %bb.0: 471; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GFX6-NEXT: s_mov_b32 s6, 0 473; GFX6-NEXT: s_mov_b32 s7, 0xf000 474; GFX6-NEXT: s_mov_b64 s[4:5], 0 475; GFX6-NEXT: buffer_load_dwordx2 v[3:4], v[0:1], s[4:7], 0 addr64 476; GFX6-NEXT: buffer_load_dword v2, v[0:1], s[4:7], 0 addr64 offset:8 477; GFX6-NEXT: s_waitcnt vmcnt(1) 478; GFX6-NEXT: v_mov_b32_e32 v0, v3 479; GFX6-NEXT: v_mov_b32_e32 v1, v4 480; GFX6-NEXT: s_waitcnt vmcnt(0) 481; GFX6-NEXT: s_setpc_b64 s[30:31] 482 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8 483 ret <3 x i32> %load 484} 485 486define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) { 487; GFX12-LABEL: v_load_constant_v6i16_align8: 488; GFX12: ; %bb.0: 489; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 490; GFX12-NEXT: s_wait_expcnt 0x0 491; GFX12-NEXT: s_wait_samplecnt 0x0 492; GFX12-NEXT: s_wait_bvhcnt 0x0 493; GFX12-NEXT: s_wait_kmcnt 0x0 494; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 495; GFX12-NEXT: s_wait_loadcnt 0x0 496; GFX12-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX9-LABEL: v_load_constant_v6i16_align8: 499; GFX9: ; %bb.0: 500; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 502; GFX9-NEXT: s_waitcnt vmcnt(0) 503; GFX9-NEXT: s_setpc_b64 s[30:31] 504; 505; GFX7-LABEL: v_load_constant_v6i16_align8: 506; GFX7: ; %bb.0: 507; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 508; GFX7-NEXT: s_mov_b32 s6, 0 509; GFX7-NEXT: s_mov_b32 s7, 0xf000 510; GFX7-NEXT: s_mov_b64 s[4:5], 0 511; GFX7-NEXT: buffer_load_dwordx3 v[6:8], v[0:1], s[4:7], 0 addr64 512; GFX7-NEXT: s_waitcnt vmcnt(0) 513; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v6 514; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v7 515; GFX7-NEXT: v_lshrrev_b32_e32 v5, 16, v8 516; GFX7-NEXT: v_mov_b32_e32 v0, v6 517; GFX7-NEXT: v_mov_b32_e32 v2, v7 518; GFX7-NEXT: v_mov_b32_e32 v4, v8 519; GFX7-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX6-LABEL: v_load_constant_v6i16_align8: 522; GFX6: ; %bb.0: 523; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX6-NEXT: s_mov_b32 s6, 0 525; GFX6-NEXT: s_mov_b32 s7, 0xf000 526; GFX6-NEXT: s_mov_b64 s[4:5], 0 527; GFX6-NEXT: buffer_load_dwordx2 v[6:7], v[0:1], s[4:7], 0 addr64 528; GFX6-NEXT: buffer_load_dword v4, v[0:1], s[4:7], 0 addr64 offset:8 529; GFX6-NEXT: s_waitcnt vmcnt(1) 530; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v6 531; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v7 532; GFX6-NEXT: s_waitcnt vmcnt(0) 533; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 534; GFX6-NEXT: v_mov_b32_e32 v0, v6 535; GFX6-NEXT: v_mov_b32_e32 v2, v7 536; GFX6-NEXT: s_setpc_b64 s[30:31] 537 %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8 538 ret <6 x i16> %load 539} 540 541define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) { 542; GFX12-LABEL: v_load_constant_v12i8_align8: 543; GFX12: ; %bb.0: 544; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 545; GFX12-NEXT: s_wait_expcnt 0x0 546; GFX12-NEXT: s_wait_samplecnt 0x0 547; GFX12-NEXT: s_wait_bvhcnt 0x0 548; GFX12-NEXT: s_wait_kmcnt 0x0 549; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 550; GFX12-NEXT: s_wait_loadcnt 0x0 551; GFX12-NEXT: v_lshrrev_b32_e32 v13, 8, v0 552; GFX12-NEXT: v_lshrrev_b32_e32 v12, 16, v0 553; GFX12-NEXT: v_lshrrev_b32_e32 v3, 24, v0 554; GFX12-NEXT: v_lshrrev_b32_e32 v5, 8, v1 555; GFX12-NEXT: v_lshrrev_b32_e32 v6, 16, v1 556; GFX12-NEXT: v_lshrrev_b32_e32 v7, 24, v1 557; GFX12-NEXT: v_lshrrev_b32_e32 v9, 8, v2 558; GFX12-NEXT: v_lshrrev_b32_e32 v10, 16, v2 559; GFX12-NEXT: v_lshrrev_b32_e32 v11, 24, v2 560; GFX12-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v13 561; GFX12-NEXT: v_mov_b32_e32 v8, v2 562; GFX12-NEXT: v_mov_b32_e32 v2, v12 563; GFX12-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX9-LABEL: v_load_constant_v12i8_align8: 566; GFX9: ; %bb.0: 567; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 569; GFX9-NEXT: s_waitcnt vmcnt(0) 570; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v0 571; GFX9-NEXT: v_lshrrev_b32_e32 v12, 16, v0 572; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v0 573; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 574; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 575; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 576; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 577; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2 578; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 579; GFX9-NEXT: v_mov_b32_e32 v4, v1 580; GFX9-NEXT: v_mov_b32_e32 v8, v2 581; GFX9-NEXT: v_mov_b32_e32 v1, v13 582; GFX9-NEXT: v_mov_b32_e32 v2, v12 583; GFX9-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX7-LABEL: v_load_constant_v12i8_align8: 586; GFX7: ; %bb.0: 587; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX7-NEXT: s_mov_b32 s6, 0 589; GFX7-NEXT: s_mov_b32 s7, 0xf000 590; GFX7-NEXT: s_mov_b64 s[4:5], 0 591; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 592; GFX7-NEXT: s_waitcnt vmcnt(0) 593; GFX7-NEXT: v_lshrrev_b32_e32 v13, 8, v0 594; GFX7-NEXT: v_lshrrev_b32_e32 v12, 16, v0 595; GFX7-NEXT: v_lshrrev_b32_e32 v3, 24, v0 596; GFX7-NEXT: v_lshrrev_b32_e32 v5, 8, v1 597; GFX7-NEXT: v_lshrrev_b32_e32 v6, 16, v1 598; GFX7-NEXT: v_lshrrev_b32_e32 v7, 24, v1 599; GFX7-NEXT: v_lshrrev_b32_e32 v9, 8, v2 600; GFX7-NEXT: v_lshrrev_b32_e32 v10, 16, v2 601; GFX7-NEXT: v_lshrrev_b32_e32 v11, 24, v2 602; GFX7-NEXT: v_mov_b32_e32 v4, v1 603; GFX7-NEXT: v_mov_b32_e32 v8, v2 604; GFX7-NEXT: v_mov_b32_e32 v1, v13 605; GFX7-NEXT: v_mov_b32_e32 v2, v12 606; GFX7-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX6-LABEL: v_load_constant_v12i8_align8: 609; GFX6: ; %bb.0: 610; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX6-NEXT: s_mov_b32 s6, 0 612; GFX6-NEXT: s_mov_b32 s7, 0xf000 613; GFX6-NEXT: s_mov_b64 s[4:5], 0 614; GFX6-NEXT: buffer_load_dwordx2 v[12:13], v[0:1], s[4:7], 0 addr64 615; GFX6-NEXT: buffer_load_dword v8, v[0:1], s[4:7], 0 addr64 offset:8 616; GFX6-NEXT: s_waitcnt vmcnt(1) 617; GFX6-NEXT: v_lshrrev_b32_e32 v1, 8, v12 618; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v12 619; GFX6-NEXT: v_lshrrev_b32_e32 v3, 24, v12 620; GFX6-NEXT: v_lshrrev_b32_e32 v5, 8, v13 621; GFX6-NEXT: v_lshrrev_b32_e32 v6, 16, v13 622; GFX6-NEXT: v_lshrrev_b32_e32 v7, 24, v13 623; GFX6-NEXT: s_waitcnt vmcnt(0) 624; GFX6-NEXT: v_lshrrev_b32_e32 v9, 8, v8 625; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 626; GFX6-NEXT: v_lshrrev_b32_e32 v11, 24, v8 627; GFX6-NEXT: v_mov_b32_e32 v0, v12 628; GFX6-NEXT: v_mov_b32_e32 v4, v13 629; GFX6-NEXT: s_setpc_b64 s[30:31] 630 %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8 631 ret <12 x i8> %load 632} 633 634define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) { 635; GFX12-LABEL: v_load_constant_v3i32_align16: 636; GFX12: ; %bb.0: 637; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 638; GFX12-NEXT: s_wait_expcnt 0x0 639; GFX12-NEXT: s_wait_samplecnt 0x0 640; GFX12-NEXT: s_wait_bvhcnt 0x0 641; GFX12-NEXT: s_wait_kmcnt 0x0 642; GFX12-NEXT: global_load_b96 v[0:2], v[0:1], off 643; GFX12-NEXT: s_wait_loadcnt 0x0 644; GFX12-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX9-LABEL: v_load_constant_v3i32_align16: 647; GFX9: ; %bb.0: 648; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX9-NEXT: global_load_dwordx3 v[0:2], v[0:1], off 650; GFX9-NEXT: s_waitcnt vmcnt(0) 651; GFX9-NEXT: s_setpc_b64 s[30:31] 652; 653; GFX7-LABEL: v_load_constant_v3i32_align16: 654; GFX7: ; %bb.0: 655; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; GFX7-NEXT: s_mov_b32 s6, 0 657; GFX7-NEXT: s_mov_b32 s7, 0xf000 658; GFX7-NEXT: s_mov_b64 s[4:5], 0 659; GFX7-NEXT: buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64 660; GFX7-NEXT: s_waitcnt vmcnt(0) 661; GFX7-NEXT: s_setpc_b64 s[30:31] 662; 663; GFX6-LABEL: v_load_constant_v3i32_align16: 664; GFX6: ; %bb.0: 665; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 666; GFX6-NEXT: s_mov_b32 s6, 0 667; GFX6-NEXT: s_mov_b32 s7, 0xf000 668; GFX6-NEXT: s_mov_b64 s[4:5], 0 669; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[4:7], 0 addr64 670; GFX6-NEXT: s_waitcnt vmcnt(0) 671; GFX6-NEXT: s_setpc_b64 s[30:31] 672 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16 673 ret <3 x i32> %load 674} 675 676define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) { 677; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align1: 678; GFX12-UNALIGNED: ; %bb.0: 679; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 680; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] 681; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 682; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 683; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 684; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 685; GFX12-UNALIGNED-NEXT: ; return to shader part epilog 686; 687; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: 688; GFX12-NOUNALIGNED: ; %bb.0: 689; GFX12-NOUNALIGNED-NEXT: s_clause 0xb 690; GFX12-NOUNALIGNED-NEXT: s_load_u8 s2, s[0:1], 0x1 691; GFX12-NOUNALIGNED-NEXT: s_load_u8 s3, s[0:1], 0x3 692; GFX12-NOUNALIGNED-NEXT: s_load_u8 s4, s[0:1], 0x2 693; GFX12-NOUNALIGNED-NEXT: s_load_u8 s5, s[0:1], 0x5 694; GFX12-NOUNALIGNED-NEXT: s_load_u8 s6, s[0:1], 0x7 695; GFX12-NOUNALIGNED-NEXT: s_load_u8 s7, s[0:1], 0x6 696; GFX12-NOUNALIGNED-NEXT: s_load_u8 s8, s[0:1], 0x9 697; GFX12-NOUNALIGNED-NEXT: s_load_u8 s9, s[0:1], 0xb 698; GFX12-NOUNALIGNED-NEXT: s_load_u8 s10, s[0:1], 0x0 699; GFX12-NOUNALIGNED-NEXT: s_load_u8 s11, s[0:1], 0x4 700; GFX12-NOUNALIGNED-NEXT: s_load_u8 s12, s[0:1], 0xa 701; GFX12-NOUNALIGNED-NEXT: s_load_u8 s1, s[0:1], 0x8 702; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 703; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 8 704; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s2, s3, 24 705; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s3, s4, 16 706; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s4, s5, 8 707; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s3 708; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s5, s6, 24 709; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s6, s7, 16 710; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s7, s8, 8 711; GFX12-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s10 712; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s8, s9, 24 713; GFX12-NOUNALIGNED-NEXT: s_or_b32 s0, s2, s0 714; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s2, s12, 16 715; GFX12-NOUNALIGNED-NEXT: s_or_b32 s3, s4, s11 716; GFX12-NOUNALIGNED-NEXT: s_or_b32 s4, s5, s6 717; GFX12-NOUNALIGNED-NEXT: s_or_b32 s5, s7, s1 718; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s8, s2 719; GFX12-NOUNALIGNED-NEXT: s_or_b32 s1, s4, s3 720; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s5 721; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog 722; 723; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1: 724; GFX9-UNALIGNED: ; %bb.0: 725; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 726; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1] 727; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 728; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 729; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 730; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 731; GFX9-UNALIGNED-NEXT: ; return to shader part epilog 732; 733; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: 734; GFX9-NOUNALIGNED: ; %bb.0: 735; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 736; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v1, v0, s[0:1] 737; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v2, v0, s[0:1] offset:1 738; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v3, v0, s[0:1] offset:2 739; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v4, v0, s[0:1] offset:3 740; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v5, v0, s[0:1] offset:4 741; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v6, v0, s[0:1] offset:5 742; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v7, v0, s[0:1] offset:6 743; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v8, v0, s[0:1] offset:7 744; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v9, v0, s[0:1] offset:8 745; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v10, v0, s[0:1] offset:9 746; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v11, v0, s[0:1] offset:11 747; GFX9-NOUNALIGNED-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10 748; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 749; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v2, 8, v1 750; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 751; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 752; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 753; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v4 754; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v0, v1, v2, v0 755; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 756; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v3, v6, 8, v5 757; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 758; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v7 759; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 760; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v8 761; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v1, v4, v5, v3 762; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 763; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v6, v10, 8, v9 764; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 765; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v11 766; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 767; GFX9-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v12 768; GFX9-NOUNALIGNED-NEXT: v_or3_b32 v2, v7, v8, v6 769; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 770; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 771; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 772; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog 773; 774; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align1: 775; GFX7-UNALIGNED: ; %bb.0: 776; GFX7-UNALIGNED-NEXT: s_mov_b32 s2, -1 777; GFX7-UNALIGNED-NEXT: s_mov_b32 s3, 0xf000 778; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], off, s[0:3], 0 779; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 780; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 781; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 782; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 783; GFX7-UNALIGNED-NEXT: ; return to shader part epilog 784; 785; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align1: 786; GFX7-NOUNALIGNED: ; %bb.0: 787; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 788; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 789; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1 790; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:3 791; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2 792; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:5 793; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:7 794; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:6 795; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:9 796; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:11 797; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:10 798; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v9, off, s[0:3], 0 799; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:4 800; GFX7-NOUNALIGNED-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:8 801; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(11) 802; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 8, v0 803; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(10) 804; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 24, v1 805; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(9) 806; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 807; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(8) 808; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v3, 8, v3 809; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(7) 810; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v4, 24, v4 811; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(6) 812; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v5, 16, v5 813; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 814; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v6, 8, v6 815; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 816; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v7, 24, v7 817; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 818; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v8, 16, v8 819; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 820; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v9 821; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v2 822; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 823; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v3, v10 824; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v3, v4, v5 825; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 826; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v4, v6, v11 827; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v5, v7, v8 828; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v1, v0 829; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v3, v2 830; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v5, v4 831; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 832; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 833; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 834; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog 835; 836; GFX6-LABEL: s_load_constant_v3i32_align1: 837; GFX6: ; %bb.0: 838; GFX6-NEXT: s_mov_b32 s2, -1 839; GFX6-NEXT: s_mov_b32 s3, 0xf000 840; GFX6-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:1 841; GFX6-NEXT: buffer_load_ubyte v1, off, s[0:3], 0 offset:3 842; GFX6-NEXT: buffer_load_ubyte v2, off, s[0:3], 0 offset:2 843; GFX6-NEXT: buffer_load_ubyte v3, off, s[0:3], 0 offset:5 844; GFX6-NEXT: buffer_load_ubyte v4, off, s[0:3], 0 offset:7 845; GFX6-NEXT: buffer_load_ubyte v5, off, s[0:3], 0 offset:6 846; GFX6-NEXT: buffer_load_ubyte v6, off, s[0:3], 0 offset:9 847; GFX6-NEXT: buffer_load_ubyte v7, off, s[0:3], 0 offset:11 848; GFX6-NEXT: buffer_load_ubyte v8, off, s[0:3], 0 offset:10 849; GFX6-NEXT: buffer_load_ubyte v9, off, s[0:3], 0 850; GFX6-NEXT: buffer_load_ubyte v10, off, s[0:3], 0 offset:4 851; GFX6-NEXT: buffer_load_ubyte v11, off, s[0:3], 0 offset:8 852; GFX6-NEXT: s_waitcnt vmcnt(11) 853; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 854; GFX6-NEXT: s_waitcnt vmcnt(10) 855; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 856; GFX6-NEXT: s_waitcnt vmcnt(9) 857; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 858; GFX6-NEXT: s_waitcnt vmcnt(8) 859; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 860; GFX6-NEXT: s_waitcnt vmcnt(7) 861; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 862; GFX6-NEXT: s_waitcnt vmcnt(6) 863; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 864; GFX6-NEXT: s_waitcnt vmcnt(5) 865; GFX6-NEXT: v_lshlrev_b32_e32 v6, 8, v6 866; GFX6-NEXT: s_waitcnt vmcnt(4) 867; GFX6-NEXT: v_lshlrev_b32_e32 v7, 24, v7 868; GFX6-NEXT: s_waitcnt vmcnt(3) 869; GFX6-NEXT: v_lshlrev_b32_e32 v8, 16, v8 870; GFX6-NEXT: s_waitcnt vmcnt(2) 871; GFX6-NEXT: v_or_b32_e32 v0, v0, v9 872; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 873; GFX6-NEXT: s_waitcnt vmcnt(1) 874; GFX6-NEXT: v_or_b32_e32 v2, v3, v10 875; GFX6-NEXT: v_or_b32_e32 v3, v4, v5 876; GFX6-NEXT: s_waitcnt vmcnt(0) 877; GFX6-NEXT: v_or_b32_e32 v4, v6, v11 878; GFX6-NEXT: v_or_b32_e32 v5, v7, v8 879; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 880; GFX6-NEXT: v_or_b32_e32 v1, v3, v2 881; GFX6-NEXT: v_or_b32_e32 v2, v5, v4 882; GFX6-NEXT: v_readfirstlane_b32 s0, v0 883; GFX6-NEXT: v_readfirstlane_b32 s1, v1 884; GFX6-NEXT: v_readfirstlane_b32 s2, v2 885; GFX6-NEXT: ; return to shader part epilog 886 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1 887 ret <3 x i32> %load 888} 889 890define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) { 891; GFX12-UNALIGNED-LABEL: s_load_constant_v3i32_align2: 892; GFX12-UNALIGNED: ; %bb.0: 893; GFX12-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 894; GFX12-UNALIGNED-NEXT: global_load_b96 v[0:2], v0, s[0:1] 895; GFX12-UNALIGNED-NEXT: s_wait_loadcnt 0x0 896; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 897; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 898; GFX12-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 899; GFX12-UNALIGNED-NEXT: ; return to shader part epilog 900; 901; GFX12-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: 902; GFX12-NOUNALIGNED: ; %bb.0: 903; GFX12-NOUNALIGNED-NEXT: s_clause 0x5 904; GFX12-NOUNALIGNED-NEXT: s_load_u16 s2, s[0:1], 0x2 905; GFX12-NOUNALIGNED-NEXT: s_load_u16 s3, s[0:1], 0x6 906; GFX12-NOUNALIGNED-NEXT: s_load_u16 s4, s[0:1], 0xa 907; GFX12-NOUNALIGNED-NEXT: s_load_u16 s5, s[0:1], 0x0 908; GFX12-NOUNALIGNED-NEXT: s_load_u16 s6, s[0:1], 0x4 909; GFX12-NOUNALIGNED-NEXT: s_load_u16 s7, s[0:1], 0x8 910; GFX12-NOUNALIGNED-NEXT: s_wait_kmcnt 0x0 911; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s0, s2, 16 912; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s1, s3, 16 913; GFX12-NOUNALIGNED-NEXT: s_lshl_b32 s2, s4, 16 914; GFX12-NOUNALIGNED-NEXT: s_or_b32 s0, s0, s5 915; GFX12-NOUNALIGNED-NEXT: s_or_b32 s1, s1, s6 916; GFX12-NOUNALIGNED-NEXT: s_or_b32 s2, s2, s7 917; GFX12-NOUNALIGNED-NEXT: ; return to shader part epilog 918; 919; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2: 920; GFX9-UNALIGNED: ; %bb.0: 921; GFX9-UNALIGNED-NEXT: v_mov_b32_e32 v0, 0 922; GFX9-UNALIGNED-NEXT: global_load_dwordx3 v[0:2], v0, s[0:1] 923; GFX9-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 924; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 925; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 926; GFX9-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 927; GFX9-UNALIGNED-NEXT: ; return to shader part epilog 928; 929; GFX9-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: 930; GFX9-NOUNALIGNED: ; %bb.0: 931; GFX9-NOUNALIGNED-NEXT: v_mov_b32_e32 v0, 0 932; GFX9-NOUNALIGNED-NEXT: global_load_ushort v1, v0, s[0:1] 933; GFX9-NOUNALIGNED-NEXT: global_load_ushort v2, v0, s[0:1] offset:2 934; GFX9-NOUNALIGNED-NEXT: global_load_ushort v3, v0, s[0:1] offset:4 935; GFX9-NOUNALIGNED-NEXT: global_load_ushort v4, v0, s[0:1] offset:6 936; GFX9-NOUNALIGNED-NEXT: global_load_ushort v5, v0, s[0:1] offset:8 937; GFX9-NOUNALIGNED-NEXT: global_load_ushort v6, v0, s[0:1] offset:10 938; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 939; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v0, v2, 16, v1 940; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 941; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 942; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v1, v4, 16, v3 943; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 944; GFX9-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 945; GFX9-NOUNALIGNED-NEXT: v_lshl_or_b32 v2, v6, 16, v5 946; GFX9-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 947; GFX9-NOUNALIGNED-NEXT: ; return to shader part epilog 948; 949; GFX7-UNALIGNED-LABEL: s_load_constant_v3i32_align2: 950; GFX7-UNALIGNED: ; %bb.0: 951; GFX7-UNALIGNED-NEXT: s_mov_b32 s2, -1 952; GFX7-UNALIGNED-NEXT: s_mov_b32 s3, 0xf000 953; GFX7-UNALIGNED-NEXT: buffer_load_dwordx3 v[0:2], off, s[0:3], 0 954; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 955; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 956; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 957; GFX7-UNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 958; GFX7-UNALIGNED-NEXT: ; return to shader part epilog 959; 960; GFX7-NOUNALIGNED-LABEL: s_load_constant_v3i32_align2: 961; GFX7-NOUNALIGNED: ; %bb.0: 962; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s2, -1 963; GFX7-NOUNALIGNED-NEXT: s_mov_b32 s3, 0xf000 964; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2 965; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:6 966; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:10 967; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v3, off, s[0:3], 0 968; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 969; GFX7-NOUNALIGNED-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 970; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(5) 971; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v0, 16, v0 972; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(4) 973; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v1 974; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(3) 975; GFX7-NOUNALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 976; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(2) 977; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v0, v0, v3 978; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(1) 979; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v1, v1, v4 980; GFX7-NOUNALIGNED-NEXT: s_waitcnt vmcnt(0) 981; GFX7-NOUNALIGNED-NEXT: v_or_b32_e32 v2, v2, v5 982; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s0, v0 983; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s1, v1 984; GFX7-NOUNALIGNED-NEXT: v_readfirstlane_b32 s2, v2 985; GFX7-NOUNALIGNED-NEXT: ; return to shader part epilog 986; 987; GFX6-LABEL: s_load_constant_v3i32_align2: 988; GFX6: ; %bb.0: 989; GFX6-NEXT: s_mov_b32 s2, -1 990; GFX6-NEXT: s_mov_b32 s3, 0xf000 991; GFX6-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:2 992; GFX6-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:6 993; GFX6-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:10 994; GFX6-NEXT: buffer_load_ushort v3, off, s[0:3], 0 995; GFX6-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 996; GFX6-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 997; GFX6-NEXT: s_waitcnt vmcnt(5) 998; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 999; GFX6-NEXT: s_waitcnt vmcnt(4) 1000; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1001; GFX6-NEXT: s_waitcnt vmcnt(3) 1002; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1003; GFX6-NEXT: s_waitcnt vmcnt(2) 1004; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 1005; GFX6-NEXT: s_waitcnt vmcnt(1) 1006; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 1007; GFX6-NEXT: s_waitcnt vmcnt(0) 1008; GFX6-NEXT: v_or_b32_e32 v2, v2, v5 1009; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1010; GFX6-NEXT: v_readfirstlane_b32 s1, v1 1011; GFX6-NEXT: v_readfirstlane_b32 s2, v2 1012; GFX6-NEXT: ; return to shader part epilog 1013 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2 1014 ret <3 x i32> %load 1015} 1016 1017define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) { 1018; GFX12-LABEL: s_load_constant_v3i32_align4: 1019; GFX12: ; %bb.0: 1020; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1021; GFX12-NEXT: s_wait_kmcnt 0x0 1022; GFX12-NEXT: ; return to shader part epilog 1023; 1024; GFX9-LABEL: s_load_constant_v3i32_align4: 1025; GFX9: ; %bb.0: 1026; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1027; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 1028; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1029; GFX9-NEXT: s_mov_b32 s0, s4 1030; GFX9-NEXT: s_mov_b32 s1, s5 1031; GFX9-NEXT: ; return to shader part epilog 1032; 1033; GFX7-LABEL: s_load_constant_v3i32_align4: 1034; GFX7: ; %bb.0: 1035; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1036; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 1037; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1038; GFX7-NEXT: s_mov_b32 s0, s4 1039; GFX7-NEXT: s_mov_b32 s1, s5 1040; GFX7-NEXT: ; return to shader part epilog 1041; 1042; GFX6-LABEL: s_load_constant_v3i32_align4: 1043; GFX6: ; %bb.0: 1044; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1045; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2 1046; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1047; GFX6-NEXT: s_mov_b32 s0, s4 1048; GFX6-NEXT: s_mov_b32 s1, s5 1049; GFX6-NEXT: ; return to shader part epilog 1050 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4 1051 ret <3 x i32> %load 1052} 1053 1054define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) { 1055; GFX12-LABEL: s_load_constant_i96_align8: 1056; GFX12: ; %bb.0: 1057; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1058; GFX12-NEXT: s_wait_kmcnt 0x0 1059; GFX12-NEXT: ; return to shader part epilog 1060; 1061; GFX9-LABEL: s_load_constant_i96_align8: 1062; GFX9: ; %bb.0: 1063; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1064; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 1065; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1066; GFX9-NEXT: s_mov_b32 s0, s4 1067; GFX9-NEXT: s_mov_b32 s1, s5 1068; GFX9-NEXT: ; return to shader part epilog 1069; 1070; GFX7-LABEL: s_load_constant_i96_align8: 1071; GFX7: ; %bb.0: 1072; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1073; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 1074; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1075; GFX7-NEXT: s_mov_b32 s0, s4 1076; GFX7-NEXT: s_mov_b32 s1, s5 1077; GFX7-NEXT: ; return to shader part epilog 1078; 1079; GFX6-LABEL: s_load_constant_i96_align8: 1080; GFX6: ; %bb.0: 1081; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1082; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2 1083; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1084; GFX6-NEXT: s_mov_b32 s0, s4 1085; GFX6-NEXT: s_mov_b32 s1, s5 1086; GFX6-NEXT: ; return to shader part epilog 1087 %load = load i96, ptr addrspace(4) %ptr, align 8 1088 ret i96 %load 1089} 1090 1091define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) { 1092; GFX12-LABEL: s_load_constant_v3i32_align8: 1093; GFX12: ; %bb.0: 1094; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1095; GFX12-NEXT: s_wait_kmcnt 0x0 1096; GFX12-NEXT: ; return to shader part epilog 1097; 1098; GFX9-LABEL: s_load_constant_v3i32_align8: 1099; GFX9: ; %bb.0: 1100; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1101; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 1102; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1103; GFX9-NEXT: s_mov_b32 s0, s4 1104; GFX9-NEXT: s_mov_b32 s1, s5 1105; GFX9-NEXT: ; return to shader part epilog 1106; 1107; GFX7-LABEL: s_load_constant_v3i32_align8: 1108; GFX7: ; %bb.0: 1109; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1110; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 1111; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1112; GFX7-NEXT: s_mov_b32 s0, s4 1113; GFX7-NEXT: s_mov_b32 s1, s5 1114; GFX7-NEXT: ; return to shader part epilog 1115; 1116; GFX6-LABEL: s_load_constant_v3i32_align8: 1117; GFX6: ; %bb.0: 1118; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1119; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2 1120; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1121; GFX6-NEXT: s_mov_b32 s0, s4 1122; GFX6-NEXT: s_mov_b32 s1, s5 1123; GFX6-NEXT: ; return to shader part epilog 1124 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8 1125 ret <3 x i32> %load 1126} 1127 1128define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) { 1129; GFX12-LABEL: s_load_constant_v6i16_align8: 1130; GFX12: ; %bb.0: 1131; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1132; GFX12-NEXT: s_wait_kmcnt 0x0 1133; GFX12-NEXT: ; return to shader part epilog 1134; 1135; GFX9-LABEL: s_load_constant_v6i16_align8: 1136; GFX9: ; %bb.0: 1137; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1138; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 1139; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1140; GFX9-NEXT: s_mov_b32 s0, s4 1141; GFX9-NEXT: s_mov_b32 s1, s5 1142; GFX9-NEXT: ; return to shader part epilog 1143; 1144; GFX7-LABEL: s_load_constant_v6i16_align8: 1145; GFX7: ; %bb.0: 1146; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1147; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 1148; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1149; GFX7-NEXT: s_mov_b32 s0, s4 1150; GFX7-NEXT: s_mov_b32 s1, s5 1151; GFX7-NEXT: ; return to shader part epilog 1152; 1153; GFX6-LABEL: s_load_constant_v6i16_align8: 1154; GFX6: ; %bb.0: 1155; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1156; GFX6-NEXT: s_load_dword s2, s[0:1], 0x2 1157; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1158; GFX6-NEXT: s_mov_b32 s0, s4 1159; GFX6-NEXT: s_mov_b32 s1, s5 1160; GFX6-NEXT: ; return to shader part epilog 1161 %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8 1162 %cast = bitcast <6 x i16> %load to <3 x i32> 1163 ret <3 x i32> %cast 1164} 1165 1166define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) { 1167; GFX12-LABEL: s_load_constant_v12i8_align8: 1168; GFX12: ; %bb.0: 1169; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1170; GFX12-NEXT: s_wait_kmcnt 0x0 1171; GFX12-NEXT: s_lshr_b32 s13, s0, 8 1172; GFX12-NEXT: s_lshr_b32 s12, s0, 16 1173; GFX12-NEXT: s_lshr_b32 s3, s0, 24 1174; GFX12-NEXT: s_lshr_b32 s5, s1, 8 1175; GFX12-NEXT: s_lshr_b32 s6, s1, 16 1176; GFX12-NEXT: s_lshr_b32 s7, s1, 24 1177; GFX12-NEXT: s_lshr_b32 s9, s2, 8 1178; GFX12-NEXT: s_lshr_b32 s10, s2, 16 1179; GFX12-NEXT: s_lshr_b32 s11, s2, 24 1180; GFX12-NEXT: s_mov_b32 s4, s1 1181; GFX12-NEXT: s_mov_b32 s8, s2 1182; GFX12-NEXT: s_mov_b32 s1, s13 1183; GFX12-NEXT: s_mov_b32 s2, s12 1184; GFX12-NEXT: ; return to shader part epilog 1185; 1186; GFX9-LABEL: s_load_constant_v12i8_align8: 1187; GFX9: ; %bb.0: 1188; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 1189; GFX9-NEXT: s_load_dword s8, s[0:1], 0x8 1190; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1191; GFX9-NEXT: s_lshr_b32 s1, s12, 8 1192; GFX9-NEXT: s_lshr_b32 s2, s12, 16 1193; GFX9-NEXT: s_lshr_b32 s3, s12, 24 1194; GFX9-NEXT: s_lshr_b32 s5, s13, 8 1195; GFX9-NEXT: s_lshr_b32 s6, s13, 16 1196; GFX9-NEXT: s_lshr_b32 s7, s13, 24 1197; GFX9-NEXT: s_lshr_b32 s9, s8, 8 1198; GFX9-NEXT: s_lshr_b32 s10, s8, 16 1199; GFX9-NEXT: s_lshr_b32 s11, s8, 24 1200; GFX9-NEXT: s_mov_b32 s0, s12 1201; GFX9-NEXT: s_mov_b32 s4, s13 1202; GFX9-NEXT: ; return to shader part epilog 1203; 1204; GFX7-LABEL: s_load_constant_v12i8_align8: 1205; GFX7: ; %bb.0: 1206; GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 1207; GFX7-NEXT: s_load_dword s8, s[0:1], 0x2 1208; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1209; GFX7-NEXT: s_lshr_b32 s1, s12, 8 1210; GFX7-NEXT: s_lshr_b32 s2, s12, 16 1211; GFX7-NEXT: s_lshr_b32 s3, s12, 24 1212; GFX7-NEXT: s_lshr_b32 s5, s13, 8 1213; GFX7-NEXT: s_lshr_b32 s6, s13, 16 1214; GFX7-NEXT: s_lshr_b32 s7, s13, 24 1215; GFX7-NEXT: s_lshr_b32 s9, s8, 8 1216; GFX7-NEXT: s_lshr_b32 s10, s8, 16 1217; GFX7-NEXT: s_lshr_b32 s11, s8, 24 1218; GFX7-NEXT: s_mov_b32 s0, s12 1219; GFX7-NEXT: s_mov_b32 s4, s13 1220; GFX7-NEXT: ; return to shader part epilog 1221; 1222; GFX6-LABEL: s_load_constant_v12i8_align8: 1223; GFX6: ; %bb.0: 1224; GFX6-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 1225; GFX6-NEXT: s_load_dword s8, s[0:1], 0x2 1226; GFX6-NEXT: s_waitcnt lgkmcnt(0) 1227; GFX6-NEXT: s_lshr_b32 s1, s12, 8 1228; GFX6-NEXT: s_lshr_b32 s2, s12, 16 1229; GFX6-NEXT: s_lshr_b32 s3, s12, 24 1230; GFX6-NEXT: s_lshr_b32 s5, s13, 8 1231; GFX6-NEXT: s_lshr_b32 s6, s13, 16 1232; GFX6-NEXT: s_lshr_b32 s7, s13, 24 1233; GFX6-NEXT: s_lshr_b32 s9, s8, 8 1234; GFX6-NEXT: s_lshr_b32 s10, s8, 16 1235; GFX6-NEXT: s_lshr_b32 s11, s8, 24 1236; GFX6-NEXT: s_mov_b32 s0, s12 1237; GFX6-NEXT: s_mov_b32 s4, s13 1238; GFX6-NEXT: ; return to shader part epilog 1239 %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8 1240 ret <12 x i8> %load 1241} 1242 1243define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) { 1244; GFX12-LABEL: s_load_constant_v3i32_align16: 1245; GFX12: ; %bb.0: 1246; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0 1247; GFX12-NEXT: s_wait_kmcnt 0x0 1248; GFX12-NEXT: ; return to shader part epilog 1249; 1250; GCN-LABEL: s_load_constant_v3i32_align16: 1251; GCN: ; %bb.0: 1252; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 1253; GCN-NEXT: s_waitcnt lgkmcnt(0) 1254; GCN-NEXT: ; return to shader part epilog 1255 %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16 1256 ret <3 x i32> %load 1257} 1258