1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=-unaligned-access-mode < %s | FileCheck --check-prefix=GFX7-ALIGNED %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX7-UNALIGNED %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX10 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX11 %s 7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+unaligned-access-mode < %s | FileCheck --check-prefix=GFX12 %s 8 9; Should not merge this to a dword load 10define i32 @global_load_2xi16_align2(ptr addrspace(1) %p) #0 { 11; GFX7-ALIGNED-LABEL: global_load_2xi16_align2: 12; GFX7-ALIGNED: ; %bb.0: 13; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v0 15; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 16; GFX7-ALIGNED-NEXT: flat_load_ushort v2, v[2:3] 17; GFX7-ALIGNED-NEXT: flat_load_ushort v0, v[0:1] 18; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1) 19; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 16, v2 20; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) 21; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 22; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX7-UNALIGNED-LABEL: global_load_2xi16_align2: 25; GFX7-UNALIGNED: ; %bb.0: 26; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX7-UNALIGNED-NEXT: flat_load_dword v0, v[0:1] 28; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 29; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX9-LABEL: global_load_2xi16_align2: 32; GFX9: ; %bb.0: 33; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX9-NEXT: global_load_dword v0, v[0:1], off 35; GFX9-NEXT: s_waitcnt vmcnt(0) 36; GFX9-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX10-LABEL: global_load_2xi16_align2: 39; GFX10: ; %bb.0: 40; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX10-NEXT: global_load_dword v0, v[0:1], off 42; GFX10-NEXT: s_waitcnt vmcnt(0) 43; GFX10-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX11-LABEL: global_load_2xi16_align2: 46; GFX11: ; %bb.0: 47; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX11-NEXT: global_load_b32 v0, v[0:1], off 49; GFX11-NEXT: s_waitcnt vmcnt(0) 50; GFX11-NEXT: s_setpc_b64 s[30:31] 51; 52; GFX12-LABEL: global_load_2xi16_align2: 53; GFX12: ; %bb.0: 54; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 55; GFX12-NEXT: s_wait_expcnt 0x0 56; GFX12-NEXT: s_wait_samplecnt 0x0 57; GFX12-NEXT: s_wait_bvhcnt 0x0 58; GFX12-NEXT: s_wait_kmcnt 0x0 59; GFX12-NEXT: global_load_b32 v0, v[0:1], off 60; GFX12-NEXT: s_wait_loadcnt 0x0 61; GFX12-NEXT: s_setpc_b64 s[30:31] 62 %gep.p = getelementptr i16, ptr addrspace(1) %p, i64 1 63 %p.0 = load i16, ptr addrspace(1) %p, align 2 64 %p.1 = load i16, ptr addrspace(1) %gep.p, align 2 65 %zext.0 = zext i16 %p.0 to i32 66 %zext.1 = zext i16 %p.1 to i32 67 %shl.1 = shl i32 %zext.1, 16 68 %or = or i32 %zext.0, %shl.1 69 ret i32 %or 70} 71 72; Should not merge this to a dword store 73define amdgpu_kernel void @global_store_2xi16_align2(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 { 74; GFX7-ALIGNED-LABEL: global_store_2xi16_align2: 75; GFX7-ALIGNED: ; %bb.0: 76; GFX7-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 77; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 1 78; GFX7-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) 79; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 80; GFX7-ALIGNED-NEXT: s_add_u32 s2, s0, 2 81; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 82; GFX7-ALIGNED-NEXT: s_addc_u32 s3, s1, 0 83; GFX7-ALIGNED-NEXT: flat_store_short v[0:1], v2 84; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s2 85; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 2 86; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s3 87; GFX7-ALIGNED-NEXT: flat_store_short v[0:1], v2 88; GFX7-ALIGNED-NEXT: s_endpgm 89; 90; GFX7-UNALIGNED-LABEL: global_store_2xi16_align2: 91; GFX7-UNALIGNED: ; %bb.0: 92; GFX7-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 93; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, 0x20001 94; GFX7-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) 95; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 96; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 97; GFX7-UNALIGNED-NEXT: flat_store_dword v[0:1], v2 98; GFX7-UNALIGNED-NEXT: s_endpgm 99; 100; GFX9-LABEL: global_store_2xi16_align2: 101; GFX9: ; %bb.0: 102; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 103; GFX9-NEXT: v_mov_b32_e32 v0, 0 104; GFX9-NEXT: v_mov_b32_e32 v1, 0x20001 105; GFX9-NEXT: s_waitcnt lgkmcnt(0) 106; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 107; GFX9-NEXT: s_endpgm 108; 109; GFX10-LABEL: global_store_2xi16_align2: 110; GFX10: ; %bb.0: 111; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 112; GFX10-NEXT: v_mov_b32_e32 v0, 0 113; GFX10-NEXT: v_mov_b32_e32 v1, 0x20001 114; GFX10-NEXT: s_waitcnt lgkmcnt(0) 115; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 116; GFX10-NEXT: s_endpgm 117; 118; GFX11-LABEL: global_store_2xi16_align2: 119; GFX11: ; %bb.0: 120; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 121; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 122; GFX11-NEXT: s_waitcnt lgkmcnt(0) 123; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 124; GFX11-NEXT: s_endpgm 125; 126; GFX12-LABEL: global_store_2xi16_align2: 127; GFX12: ; %bb.0: 128; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 129; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 130; GFX12-NEXT: s_wait_kmcnt 0x0 131; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 132; GFX12-NEXT: s_endpgm 133 %gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1 134 store i16 1, ptr addrspace(1) %r, align 2 135 store i16 2, ptr addrspace(1) %gep.r, align 2 136 ret void 137} 138 139; Should produce align 1 dword when legal 140define i32 @global_load_2xi16_align1(ptr addrspace(1) %p) #0 { 141; GFX7-ALIGNED-LABEL: global_load_2xi16_align1: 142; GFX7-ALIGNED: ; %bb.0: 143; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 144; GFX7-ALIGNED-NEXT: v_add_i32_e32 v2, vcc, 2, v0 145; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 146; GFX7-ALIGNED-NEXT: v_add_i32_e32 v4, vcc, 1, v0 147; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc 148; GFX7-ALIGNED-NEXT: v_add_i32_e32 v6, vcc, 3, v0 149; GFX7-ALIGNED-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc 150; GFX7-ALIGNED-NEXT: flat_load_ubyte v6, v[6:7] 151; GFX7-ALIGNED-NEXT: flat_load_ubyte v4, v[4:5] 152; GFX7-ALIGNED-NEXT: flat_load_ubyte v2, v[2:3] 153; GFX7-ALIGNED-NEXT: flat_load_ubyte v0, v[0:1] 154; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(3) 155; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v3, 24, v6 156; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(2) 157; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v1, 8, v4 158; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(1) 159; GFX7-ALIGNED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 160; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) 161; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v1, v0 162; GFX7-ALIGNED-NEXT: v_or_b32_e32 v1, v3, v2 163; GFX7-ALIGNED-NEXT: v_or_b32_e32 v0, v0, v1 164; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX7-UNALIGNED-LABEL: global_load_2xi16_align1: 167; GFX7-UNALIGNED: ; %bb.0: 168; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX7-UNALIGNED-NEXT: flat_load_dword v0, v[0:1] 170; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 171; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX9-LABEL: global_load_2xi16_align1: 174; GFX9: ; %bb.0: 175; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX9-NEXT: global_load_dword v0, v[0:1], off 177; GFX9-NEXT: s_waitcnt vmcnt(0) 178; GFX9-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: global_load_2xi16_align1: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: global_load_dword v0, v[0:1], off 184; GFX10-NEXT: s_waitcnt vmcnt(0) 185; GFX10-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX11-LABEL: global_load_2xi16_align1: 188; GFX11: ; %bb.0: 189; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX11-NEXT: global_load_b32 v0, v[0:1], off 191; GFX11-NEXT: s_waitcnt vmcnt(0) 192; GFX11-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX12-LABEL: global_load_2xi16_align1: 195; GFX12: ; %bb.0: 196; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 197; GFX12-NEXT: s_wait_expcnt 0x0 198; GFX12-NEXT: s_wait_samplecnt 0x0 199; GFX12-NEXT: s_wait_bvhcnt 0x0 200; GFX12-NEXT: s_wait_kmcnt 0x0 201; GFX12-NEXT: global_load_b32 v0, v[0:1], off 202; GFX12-NEXT: s_wait_loadcnt 0x0 203; GFX12-NEXT: s_setpc_b64 s[30:31] 204 %gep.p = getelementptr i16, ptr addrspace(1) %p, i64 1 205 %p.0 = load i16, ptr addrspace(1) %p, align 1 206 %p.1 = load i16, ptr addrspace(1) %gep.p, align 1 207 %zext.0 = zext i16 %p.0 to i32 208 %zext.1 = zext i16 %p.1 to i32 209 %shl.1 = shl i32 %zext.1, 16 210 %or = or i32 %zext.0, %shl.1 211 ret i32 %or 212} 213 214; Should produce align 1 dword when legal 215define amdgpu_kernel void @global_store_2xi16_align1(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 { 216; GFX7-ALIGNED-LABEL: global_store_2xi16_align1: 217; GFX7-ALIGNED: ; %bb.0: 218; GFX7-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 219; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 1 220; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v3, 0 221; GFX7-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) 222; GFX7-ALIGNED-NEXT: s_add_u32 s2, s0, 2 223; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 224; GFX7-ALIGNED-NEXT: s_addc_u32 s3, s1, 0 225; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 226; GFX7-ALIGNED-NEXT: s_add_u32 s4, s0, 1 227; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v2 228; GFX7-ALIGNED-NEXT: s_addc_u32 s5, s1, 0 229; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s4 230; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s5 231; GFX7-ALIGNED-NEXT: s_add_u32 s0, s0, 3 232; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v3 233; GFX7-ALIGNED-NEXT: s_addc_u32 s1, s1, 0 234; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 235; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 236; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v3 237; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s2 238; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 2 239; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s3 240; GFX7-ALIGNED-NEXT: flat_store_byte v[0:1], v2 241; GFX7-ALIGNED-NEXT: s_endpgm 242; 243; GFX7-UNALIGNED-LABEL: global_store_2xi16_align1: 244; GFX7-UNALIGNED: ; %bb.0: 245; GFX7-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 246; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, 0x20001 247; GFX7-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) 248; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 249; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 250; GFX7-UNALIGNED-NEXT: flat_store_dword v[0:1], v2 251; GFX7-UNALIGNED-NEXT: s_endpgm 252; 253; GFX9-LABEL: global_store_2xi16_align1: 254; GFX9: ; %bb.0: 255; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 256; GFX9-NEXT: v_mov_b32_e32 v0, 0 257; GFX9-NEXT: v_mov_b32_e32 v1, 0x20001 258; GFX9-NEXT: s_waitcnt lgkmcnt(0) 259; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 260; GFX9-NEXT: s_endpgm 261; 262; GFX10-LABEL: global_store_2xi16_align1: 263; GFX10: ; %bb.0: 264; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 265; GFX10-NEXT: v_mov_b32_e32 v0, 0 266; GFX10-NEXT: v_mov_b32_e32 v1, 0x20001 267; GFX10-NEXT: s_waitcnt lgkmcnt(0) 268; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 269; GFX10-NEXT: s_endpgm 270; 271; GFX11-LABEL: global_store_2xi16_align1: 272; GFX11: ; %bb.0: 273; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 274; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 275; GFX11-NEXT: s_waitcnt lgkmcnt(0) 276; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 277; GFX11-NEXT: s_endpgm 278; 279; GFX12-LABEL: global_store_2xi16_align1: 280; GFX12: ; %bb.0: 281; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 282; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 283; GFX12-NEXT: s_wait_kmcnt 0x0 284; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 285; GFX12-NEXT: s_endpgm 286 %gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1 287 store i16 1, ptr addrspace(1) %r, align 1 288 store i16 2, ptr addrspace(1) %gep.r, align 1 289 ret void 290} 291 292; Should merge this to a dword load 293define i32 @global_load_2xi16_align4(ptr addrspace(1) %p) #0 { 294; GFX7-ALIGNED-LABEL: global_load_2xi16_align4: 295; GFX7-ALIGNED: ; %bb.0: 296; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX7-ALIGNED-NEXT: flat_load_dword v0, v[0:1] 298; GFX7-ALIGNED-NEXT: s_waitcnt vmcnt(0) 299; GFX7-ALIGNED-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX7-UNALIGNED-LABEL: global_load_2xi16_align4: 302; GFX7-UNALIGNED: ; %bb.0: 303; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX7-UNALIGNED-NEXT: flat_load_dword v0, v[0:1] 305; GFX7-UNALIGNED-NEXT: s_waitcnt vmcnt(0) 306; GFX7-UNALIGNED-NEXT: s_setpc_b64 s[30:31] 307; 308; GFX9-LABEL: global_load_2xi16_align4: 309; GFX9: ; %bb.0: 310; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 311; GFX9-NEXT: global_load_dword v0, v[0:1], off 312; GFX9-NEXT: s_waitcnt vmcnt(0) 313; GFX9-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX10-LABEL: global_load_2xi16_align4: 316; GFX10: ; %bb.0: 317; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX10-NEXT: global_load_dword v0, v[0:1], off 319; GFX10-NEXT: s_waitcnt vmcnt(0) 320; GFX10-NEXT: s_setpc_b64 s[30:31] 321; 322; GFX11-LABEL: global_load_2xi16_align4: 323; GFX11: ; %bb.0: 324; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX11-NEXT: global_load_b32 v0, v[0:1], off 326; GFX11-NEXT: s_waitcnt vmcnt(0) 327; GFX11-NEXT: s_setpc_b64 s[30:31] 328; 329; GFX12-LABEL: global_load_2xi16_align4: 330; GFX12: ; %bb.0: 331; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 332; GFX12-NEXT: s_wait_expcnt 0x0 333; GFX12-NEXT: s_wait_samplecnt 0x0 334; GFX12-NEXT: s_wait_bvhcnt 0x0 335; GFX12-NEXT: s_wait_kmcnt 0x0 336; GFX12-NEXT: global_load_b32 v0, v[0:1], off 337; GFX12-NEXT: s_wait_loadcnt 0x0 338; GFX12-NEXT: s_setpc_b64 s[30:31] 339 %gep.p = getelementptr i16, ptr addrspace(1) %p, i64 1 340 %p.0 = load i16, ptr addrspace(1) %p, align 4 341 %p.1 = load i16, ptr addrspace(1) %gep.p, align 2 342 %zext.0 = zext i16 %p.0 to i32 343 %zext.1 = zext i16 %p.1 to i32 344 %shl.1 = shl i32 %zext.1, 16 345 %or = or i32 %zext.0, %shl.1 346 ret i32 %or 347} 348 349; Should merge this to a dword store 350define amdgpu_kernel void @global_store_2xi16_align4(ptr addrspace(1) %p, ptr addrspace(1) %r) #0 { 351; GFX7-ALIGNED-LABEL: global_store_2xi16_align4: 352; GFX7-ALIGNED: ; %bb.0: 353; GFX7-ALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 354; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v2, 0x20001 355; GFX7-ALIGNED-NEXT: s_waitcnt lgkmcnt(0) 356; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v0, s0 357; GFX7-ALIGNED-NEXT: v_mov_b32_e32 v1, s1 358; GFX7-ALIGNED-NEXT: flat_store_dword v[0:1], v2 359; GFX7-ALIGNED-NEXT: s_endpgm 360; 361; GFX7-UNALIGNED-LABEL: global_store_2xi16_align4: 362; GFX7-UNALIGNED: ; %bb.0: 363; GFX7-UNALIGNED-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x2 364; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v2, 0x20001 365; GFX7-UNALIGNED-NEXT: s_waitcnt lgkmcnt(0) 366; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v0, s0 367; GFX7-UNALIGNED-NEXT: v_mov_b32_e32 v1, s1 368; GFX7-UNALIGNED-NEXT: flat_store_dword v[0:1], v2 369; GFX7-UNALIGNED-NEXT: s_endpgm 370; 371; GFX9-LABEL: global_store_2xi16_align4: 372; GFX9: ; %bb.0: 373; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 374; GFX9-NEXT: v_mov_b32_e32 v0, 0 375; GFX9-NEXT: v_mov_b32_e32 v1, 0x20001 376; GFX9-NEXT: s_waitcnt lgkmcnt(0) 377; GFX9-NEXT: global_store_dword v0, v1, s[0:1] 378; GFX9-NEXT: s_endpgm 379; 380; GFX10-LABEL: global_store_2xi16_align4: 381; GFX10: ; %bb.0: 382; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x8 383; GFX10-NEXT: v_mov_b32_e32 v0, 0 384; GFX10-NEXT: v_mov_b32_e32 v1, 0x20001 385; GFX10-NEXT: s_waitcnt lgkmcnt(0) 386; GFX10-NEXT: global_store_dword v0, v1, s[0:1] 387; GFX10-NEXT: s_endpgm 388; 389; GFX11-LABEL: global_store_2xi16_align4: 390; GFX11: ; %bb.0: 391; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 392; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 393; GFX11-NEXT: s_waitcnt lgkmcnt(0) 394; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 395; GFX11-NEXT: s_endpgm 396; 397; GFX12-LABEL: global_store_2xi16_align4: 398; GFX12: ; %bb.0: 399; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 400; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x20001 401; GFX12-NEXT: s_wait_kmcnt 0x0 402; GFX12-NEXT: global_store_b32 v0, v1, s[0:1] 403; GFX12-NEXT: s_endpgm 404 %gep.r = getelementptr i16, ptr addrspace(1) %r, i64 1 405 store i16 1, ptr addrspace(1) %r, align 4 406 store i16 2, ptr addrspace(1) %gep.r, align 2 407 ret void 408} 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442