1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefix=GFX7 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s 7 8define <3 x i32> @load_lds_v3i32(ptr addrspace(3) %ptr) { 9; GFX9-LABEL: load_lds_v3i32: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX9-NEXT: ds_read_b96 v[0:2], v0 13; GFX9-NEXT: s_waitcnt lgkmcnt(0) 14; GFX9-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX7-LABEL: load_lds_v3i32: 17; GFX7: ; %bb.0: 18; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX7-NEXT: s_mov_b32 m0, -1 20; GFX7-NEXT: ds_read_b96 v[0:2], v0 21; GFX7-NEXT: s_waitcnt lgkmcnt(0) 22; GFX7-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX6-LABEL: load_lds_v3i32: 25; GFX6: ; %bb.0: 26; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX6-NEXT: v_mov_b32_e32 v2, v0 28; GFX6-NEXT: s_mov_b32 m0, -1 29; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 30; GFX6-NEXT: ds_read_b64 v[0:1], v0 31; GFX6-NEXT: ds_read_b32 v2, v2 32; GFX6-NEXT: s_waitcnt lgkmcnt(0) 33; GFX6-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX10-LABEL: load_lds_v3i32: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX10-NEXT: ds_read_b96 v[0:2], v0 39; GFX10-NEXT: s_waitcnt lgkmcnt(0) 40; GFX10-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX11-LABEL: load_lds_v3i32: 43; GFX11: ; %bb.0: 44; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX11-NEXT: ds_load_b96 v[0:2], v0 46; GFX11-NEXT: s_waitcnt lgkmcnt(0) 47; GFX11-NEXT: s_setpc_b64 s[30:31] 48 %load = load <3 x i32>, ptr addrspace(3) %ptr 49 ret <3 x i32> %load 50} 51 52define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) { 53; GFX9-LABEL: load_lds_v3i32_align1: 54; GFX9: ; %bb.0: 55; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX9-NEXT: ds_read_u8 v1, v0 57; GFX9-NEXT: ds_read_u8 v2, v0 offset:1 58; GFX9-NEXT: ds_read_u8 v3, v0 offset:2 59; GFX9-NEXT: ds_read_u8 v4, v0 offset:3 60; GFX9-NEXT: ds_read_u8 v5, v0 offset:4 61; GFX9-NEXT: ds_read_u8 v6, v0 offset:5 62; GFX9-NEXT: ds_read_u8 v7, v0 offset:6 63; GFX9-NEXT: ds_read_u8 v8, v0 offset:7 64; GFX9-NEXT: ds_read_u8 v9, v0 offset:8 65; GFX9-NEXT: ds_read_u8 v10, v0 offset:9 66; GFX9-NEXT: ds_read_u8 v11, v0 offset:10 67; GFX9-NEXT: ds_read_u8 v12, v0 offset:11 68; GFX9-NEXT: s_waitcnt lgkmcnt(10) 69; GFX9-NEXT: v_lshl_or_b32 v0, v2, 8, v1 70; GFX9-NEXT: s_waitcnt lgkmcnt(8) 71; GFX9-NEXT: v_lshl_or_b32 v1, v4, 8, v3 72; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 73; GFX9-NEXT: s_waitcnt lgkmcnt(6) 74; GFX9-NEXT: v_lshl_or_b32 v1, v6, 8, v5 75; GFX9-NEXT: s_waitcnt lgkmcnt(4) 76; GFX9-NEXT: v_lshl_or_b32 v2, v8, 8, v7 77; GFX9-NEXT: v_lshl_or_b32 v1, v2, 16, v1 78; GFX9-NEXT: s_waitcnt lgkmcnt(2) 79; GFX9-NEXT: v_lshl_or_b32 v2, v10, 8, v9 80; GFX9-NEXT: s_waitcnt lgkmcnt(0) 81; GFX9-NEXT: v_lshl_or_b32 v3, v12, 8, v11 82; GFX9-NEXT: v_lshl_or_b32 v2, v3, 16, v2 83; GFX9-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX7-LABEL: load_lds_v3i32_align1: 86; GFX7: ; %bb.0: 87; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX7-NEXT: s_mov_b32 m0, -1 89; GFX7-NEXT: ds_read_u8 v1, v0 offset:6 90; GFX7-NEXT: ds_read_u8 v2, v0 offset:4 91; GFX7-NEXT: ds_read_u8 v3, v0 offset:2 92; GFX7-NEXT: ds_read_u8 v4, v0 offset:1 93; GFX7-NEXT: ds_read_u8 v5, v0 94; GFX7-NEXT: ds_read_u8 v6, v0 offset:3 95; GFX7-NEXT: ds_read_u8 v7, v0 offset:5 96; GFX7-NEXT: ds_read_u8 v8, v0 offset:7 97; GFX7-NEXT: s_waitcnt lgkmcnt(4) 98; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v4 99; GFX7-NEXT: s_waitcnt lgkmcnt(3) 100; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 101; GFX7-NEXT: s_waitcnt lgkmcnt(2) 102; GFX7-NEXT: v_lshlrev_b32_e32 v5, 8, v6 103; GFX7-NEXT: v_or_b32_e32 v3, v5, v3 104; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 105; GFX7-NEXT: v_or_b32_e32 v3, v3, v4 106; GFX7-NEXT: s_waitcnt lgkmcnt(1) 107; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v7 108; GFX7-NEXT: ds_read_u8 v5, v0 offset:11 109; GFX7-NEXT: ds_read_u8 v6, v0 offset:10 110; GFX7-NEXT: ds_read_u8 v7, v0 offset:9 111; GFX7-NEXT: ds_read_u8 v0, v0 offset:8 112; GFX7-NEXT: v_or_b32_e32 v2, v4, v2 113; GFX7-NEXT: s_waitcnt lgkmcnt(4) 114; GFX7-NEXT: v_lshlrev_b32_e32 v4, 8, v8 115; GFX7-NEXT: v_or_b32_e32 v1, v4, v1 116; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 117; GFX7-NEXT: v_or_b32_e32 v1, v1, v2 118; GFX7-NEXT: s_waitcnt lgkmcnt(1) 119; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v7 120; GFX7-NEXT: s_waitcnt lgkmcnt(0) 121; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 122; GFX7-NEXT: v_lshlrev_b32_e32 v2, 8, v5 123; GFX7-NEXT: v_or_b32_e32 v2, v2, v6 124; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v2 125; GFX7-NEXT: v_or_b32_e32 v2, v2, v0 126; GFX7-NEXT: v_mov_b32_e32 v0, v3 127; GFX7-NEXT: s_setpc_b64 s[30:31] 128; 129; GFX6-LABEL: load_lds_v3i32_align1: 130; GFX6: ; %bb.0: 131; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX6-NEXT: v_add_i32_e32 v1, vcc, 5, v0 133; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 134; GFX6-NEXT: v_add_i32_e32 v3, vcc, 7, v0 135; GFX6-NEXT: v_add_i32_e32 v4, vcc, 6, v0 136; GFX6-NEXT: v_add_i32_e32 v5, vcc, 9, v0 137; GFX6-NEXT: v_add_i32_e32 v6, vcc, 8, v0 138; GFX6-NEXT: v_add_i32_e32 v7, vcc, 11, v0 139; GFX6-NEXT: s_mov_b32 m0, -1 140; GFX6-NEXT: ds_read_u8 v1, v1 141; GFX6-NEXT: ds_read_u8 v2, v2 142; GFX6-NEXT: ds_read_u8 v3, v3 143; GFX6-NEXT: ds_read_u8 v4, v4 144; GFX6-NEXT: ds_read_u8 v5, v5 145; GFX6-NEXT: ds_read_u8 v6, v6 146; GFX6-NEXT: ds_read_u8 v7, v7 147; GFX6-NEXT: ds_read_u8 v8, v0 148; GFX6-NEXT: s_waitcnt lgkmcnt(7) 149; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 150; GFX6-NEXT: s_waitcnt lgkmcnt(6) 151; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 152; GFX6-NEXT: s_waitcnt lgkmcnt(5) 153; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v3 154; GFX6-NEXT: s_waitcnt lgkmcnt(4) 155; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 156; GFX6-NEXT: v_add_i32_e32 v4, vcc, 10, v0 157; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 158; GFX6-NEXT: ds_read_u8 v4, v4 159; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 160; GFX6-NEXT: s_waitcnt lgkmcnt(4) 161; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v5 162; GFX6-NEXT: s_waitcnt lgkmcnt(3) 163; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 164; GFX6-NEXT: v_add_i32_e32 v5, vcc, 3, v0 165; GFX6-NEXT: v_add_i32_e32 v6, vcc, 2, v0 166; GFX6-NEXT: v_add_i32_e32 v0, vcc, 1, v0 167; GFX6-NEXT: ds_read_u8 v5, v5 168; GFX6-NEXT: ds_read_u8 v6, v6 169; GFX6-NEXT: ds_read_u8 v0, v0 170; GFX6-NEXT: s_waitcnt lgkmcnt(5) 171; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v7 172; GFX6-NEXT: s_waitcnt lgkmcnt(3) 173; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 174; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 175; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 176; GFX6-NEXT: s_waitcnt lgkmcnt(2) 177; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v5 178; GFX6-NEXT: s_waitcnt lgkmcnt(1) 179; GFX6-NEXT: v_or_b32_e32 v3, v3, v6 180; GFX6-NEXT: s_waitcnt lgkmcnt(0) 181; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 182; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 183; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 184; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 185; GFX6-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX10-LABEL: load_lds_v3i32_align1: 188; GFX10: ; %bb.0: 189; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX10-NEXT: ds_read_u8 v1, v0 191; GFX10-NEXT: ds_read_u8 v2, v0 offset:1 192; GFX10-NEXT: ds_read_u8 v3, v0 offset:2 193; GFX10-NEXT: ds_read_u8 v4, v0 offset:3 194; GFX10-NEXT: ds_read_u8 v5, v0 offset:4 195; GFX10-NEXT: ds_read_u8 v6, v0 offset:5 196; GFX10-NEXT: ds_read_u8 v7, v0 offset:6 197; GFX10-NEXT: ds_read_u8 v8, v0 offset:7 198; GFX10-NEXT: ds_read_u8 v9, v0 offset:8 199; GFX10-NEXT: ds_read_u8 v10, v0 offset:9 200; GFX10-NEXT: ds_read_u8 v11, v0 offset:10 201; GFX10-NEXT: ds_read_u8 v0, v0 offset:11 202; GFX10-NEXT: s_waitcnt lgkmcnt(10) 203; GFX10-NEXT: v_lshl_or_b32 v1, v2, 8, v1 204; GFX10-NEXT: s_waitcnt lgkmcnt(8) 205; GFX10-NEXT: v_lshl_or_b32 v2, v4, 8, v3 206; GFX10-NEXT: s_waitcnt lgkmcnt(6) 207; GFX10-NEXT: v_lshl_or_b32 v3, v6, 8, v5 208; GFX10-NEXT: s_waitcnt lgkmcnt(4) 209; GFX10-NEXT: v_lshl_or_b32 v4, v8, 8, v7 210; GFX10-NEXT: s_waitcnt lgkmcnt(2) 211; GFX10-NEXT: v_lshl_or_b32 v5, v10, 8, v9 212; GFX10-NEXT: s_waitcnt lgkmcnt(0) 213; GFX10-NEXT: v_lshl_or_b32 v6, v0, 8, v11 214; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 215; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 216; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 217; GFX10-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX11-LABEL: load_lds_v3i32_align1: 220; GFX11: ; %bb.0: 221; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX11-NEXT: ds_load_u8 v1, v0 223; GFX11-NEXT: ds_load_u8 v2, v0 offset:1 224; GFX11-NEXT: ds_load_u8 v3, v0 offset:2 225; GFX11-NEXT: ds_load_u8 v4, v0 offset:3 226; GFX11-NEXT: ds_load_u8 v5, v0 offset:4 227; GFX11-NEXT: ds_load_u8 v6, v0 offset:5 228; GFX11-NEXT: ds_load_u8 v7, v0 offset:6 229; GFX11-NEXT: ds_load_u8 v8, v0 offset:7 230; GFX11-NEXT: ds_load_u8 v9, v0 offset:8 231; GFX11-NEXT: ds_load_u8 v10, v0 offset:9 232; GFX11-NEXT: ds_load_u8 v11, v0 offset:10 233; GFX11-NEXT: ds_load_u8 v0, v0 offset:11 234; GFX11-NEXT: s_waitcnt lgkmcnt(10) 235; GFX11-NEXT: v_lshl_or_b32 v1, v2, 8, v1 236; GFX11-NEXT: s_waitcnt lgkmcnt(8) 237; GFX11-NEXT: v_lshl_or_b32 v2, v4, 8, v3 238; GFX11-NEXT: s_waitcnt lgkmcnt(6) 239; GFX11-NEXT: v_lshl_or_b32 v3, v6, 8, v5 240; GFX11-NEXT: s_waitcnt lgkmcnt(4) 241; GFX11-NEXT: v_lshl_or_b32 v4, v8, 8, v7 242; GFX11-NEXT: s_waitcnt lgkmcnt(2) 243; GFX11-NEXT: v_lshl_or_b32 v5, v10, 8, v9 244; GFX11-NEXT: s_waitcnt lgkmcnt(0) 245; GFX11-NEXT: v_lshl_or_b32 v6, v0, 8, v11 246; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v1 247; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3 248; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 249; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5 250; GFX11-NEXT: s_setpc_b64 s[30:31] 251 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1 252 ret <3 x i32> %load 253} 254 255define <3 x i32> @load_lds_v3i32_align2(ptr addrspace(3) %ptr) { 256; GFX9-LABEL: load_lds_v3i32_align2: 257; GFX9: ; %bb.0: 258; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX9-NEXT: ds_read_u16 v1, v0 260; GFX9-NEXT: ds_read_u16 v2, v0 offset:2 261; GFX9-NEXT: ds_read_u16 v3, v0 offset:4 262; GFX9-NEXT: ds_read_u16 v4, v0 offset:6 263; GFX9-NEXT: ds_read_u16 v5, v0 offset:8 264; GFX9-NEXT: ds_read_u16 v6, v0 offset:10 265; GFX9-NEXT: s_waitcnt lgkmcnt(4) 266; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v1 267; GFX9-NEXT: s_waitcnt lgkmcnt(2) 268; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v3 269; GFX9-NEXT: s_waitcnt lgkmcnt(0) 270; GFX9-NEXT: v_lshl_or_b32 v2, v6, 16, v5 271; GFX9-NEXT: s_setpc_b64 s[30:31] 272; 273; GFX7-LABEL: load_lds_v3i32_align2: 274; GFX7: ; %bb.0: 275; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 276; GFX7-NEXT: s_mov_b32 m0, -1 277; GFX7-NEXT: ds_read_u16 v2, v0 offset:8 278; GFX7-NEXT: ds_read_u16 v1, v0 offset:4 279; GFX7-NEXT: ds_read_u16 v3, v0 offset:2 280; GFX7-NEXT: ds_read_u16 v4, v0 281; GFX7-NEXT: ds_read_u16 v5, v0 offset:6 282; GFX7-NEXT: ds_read_u16 v6, v0 offset:10 283; GFX7-NEXT: s_waitcnt lgkmcnt(3) 284; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 285; GFX7-NEXT: s_waitcnt lgkmcnt(2) 286; GFX7-NEXT: v_or_b32_e32 v0, v0, v4 287; GFX7-NEXT: s_waitcnt lgkmcnt(1) 288; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5 289; GFX7-NEXT: v_or_b32_e32 v1, v3, v1 290; GFX7-NEXT: s_waitcnt lgkmcnt(0) 291; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v6 292; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 293; GFX7-NEXT: s_setpc_b64 s[30:31] 294; 295; GFX6-LABEL: load_lds_v3i32_align2: 296; GFX6: ; %bb.0: 297; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX6-NEXT: v_add_i32_e32 v1, vcc, 6, v0 299; GFX6-NEXT: v_add_i32_e32 v2, vcc, 4, v0 300; GFX6-NEXT: v_add_i32_e32 v3, vcc, 10, v0 301; GFX6-NEXT: v_add_i32_e32 v4, vcc, 8, v0 302; GFX6-NEXT: v_add_i32_e32 v5, vcc, 2, v0 303; GFX6-NEXT: s_mov_b32 m0, -1 304; GFX6-NEXT: ds_read_u16 v1, v1 305; GFX6-NEXT: ds_read_u16 v2, v2 306; GFX6-NEXT: ds_read_u16 v3, v3 307; GFX6-NEXT: ds_read_u16 v4, v4 308; GFX6-NEXT: ds_read_u16 v5, v5 309; GFX6-NEXT: ds_read_u16 v0, v0 310; GFX6-NEXT: s_waitcnt lgkmcnt(5) 311; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 312; GFX6-NEXT: s_waitcnt lgkmcnt(4) 313; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 314; GFX6-NEXT: s_waitcnt lgkmcnt(3) 315; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v3 316; GFX6-NEXT: s_waitcnt lgkmcnt(1) 317; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5 318; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 319; GFX6-NEXT: s_waitcnt lgkmcnt(0) 320; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 321; GFX6-NEXT: s_setpc_b64 s[30:31] 322; 323; GFX10-LABEL: load_lds_v3i32_align2: 324; GFX10: ; %bb.0: 325; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GFX10-NEXT: ds_read_u16 v1, v0 327; GFX10-NEXT: ds_read_u16 v2, v0 offset:2 328; GFX10-NEXT: ds_read_u16 v3, v0 offset:4 329; GFX10-NEXT: ds_read_u16 v4, v0 offset:6 330; GFX10-NEXT: ds_read_u16 v5, v0 offset:8 331; GFX10-NEXT: ds_read_u16 v6, v0 offset:10 332; GFX10-NEXT: s_waitcnt lgkmcnt(4) 333; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 334; GFX10-NEXT: s_waitcnt lgkmcnt(2) 335; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 336; GFX10-NEXT: s_waitcnt lgkmcnt(0) 337; GFX10-NEXT: v_lshl_or_b32 v2, v6, 16, v5 338; GFX10-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX11-LABEL: load_lds_v3i32_align2: 341; GFX11: ; %bb.0: 342; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX11-NEXT: ds_load_u16 v1, v0 344; GFX11-NEXT: ds_load_u16 v2, v0 offset:2 345; GFX11-NEXT: ds_load_u16 v3, v0 offset:4 346; GFX11-NEXT: ds_load_u16 v4, v0 offset:6 347; GFX11-NEXT: ds_load_u16 v5, v0 offset:8 348; GFX11-NEXT: ds_load_u16 v6, v0 offset:10 349; GFX11-NEXT: s_waitcnt lgkmcnt(4) 350; GFX11-NEXT: v_lshl_or_b32 v0, v2, 16, v1 351; GFX11-NEXT: s_waitcnt lgkmcnt(2) 352; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3 353; GFX11-NEXT: s_waitcnt lgkmcnt(0) 354; GFX11-NEXT: v_lshl_or_b32 v2, v6, 16, v5 355; GFX11-NEXT: s_setpc_b64 s[30:31] 356 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 2 357 ret <3 x i32> %load 358} 359 360define <3 x i32> @load_lds_v3i32_align4(ptr addrspace(3) %ptr) { 361; GFX9-LABEL: load_lds_v3i32_align4: 362; GFX9: ; %bb.0: 363; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX9-NEXT: v_mov_b32_e32 v2, v0 365; GFX9-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 366; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 367; GFX9-NEXT: s_waitcnt lgkmcnt(0) 368; GFX9-NEXT: s_setpc_b64 s[30:31] 369; 370; GFX7-LABEL: load_lds_v3i32_align4: 371; GFX7: ; %bb.0: 372; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GFX7-NEXT: v_mov_b32_e32 v2, v0 374; GFX7-NEXT: s_mov_b32 m0, -1 375; GFX7-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 376; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 377; GFX7-NEXT: s_waitcnt lgkmcnt(0) 378; GFX7-NEXT: s_setpc_b64 s[30:31] 379; 380; GFX6-LABEL: load_lds_v3i32_align4: 381; GFX6: ; %bb.0: 382; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 383; GFX6-NEXT: v_add_i32_e32 v1, vcc, 4, v0 384; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v0 385; GFX6-NEXT: s_mov_b32 m0, -1 386; GFX6-NEXT: ds_read_b32 v2, v2 387; GFX6-NEXT: ds_read_b32 v0, v0 388; GFX6-NEXT: ds_read_b32 v1, v1 389; GFX6-NEXT: s_waitcnt lgkmcnt(0) 390; GFX6-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX10-LABEL: load_lds_v3i32_align4: 393; GFX10: ; %bb.0: 394; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX10-NEXT: v_mov_b32_e32 v2, v0 396; GFX10-NEXT: ds_read2_b32 v[0:1], v0 offset1:1 397; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 398; GFX10-NEXT: s_waitcnt lgkmcnt(0) 399; GFX10-NEXT: s_setpc_b64 s[30:31] 400; 401; GFX11-LABEL: load_lds_v3i32_align4: 402; GFX11: ; %bb.0: 403; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; GFX11-NEXT: v_mov_b32_e32 v2, v0 405; GFX11-NEXT: ds_load_2addr_b32 v[0:1], v0 offset1:1 406; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 407; GFX11-NEXT: s_waitcnt lgkmcnt(0) 408; GFX11-NEXT: s_setpc_b64 s[30:31] 409 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 4 410 ret <3 x i32> %load 411} 412 413define <3 x i32> @load_lds_v3i32_align8(ptr addrspace(3) %ptr) { 414; GFX9-LABEL: load_lds_v3i32_align8: 415; GFX9: ; %bb.0: 416; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 417; GFX9-NEXT: v_mov_b32_e32 v2, v0 418; GFX9-NEXT: ds_read_b64 v[0:1], v0 419; GFX9-NEXT: ds_read_b32 v2, v2 offset:8 420; GFX9-NEXT: s_waitcnt lgkmcnt(0) 421; GFX9-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX7-LABEL: load_lds_v3i32_align8: 424; GFX7: ; %bb.0: 425; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX7-NEXT: v_mov_b32_e32 v2, v0 427; GFX7-NEXT: s_mov_b32 m0, -1 428; GFX7-NEXT: ds_read_b64 v[0:1], v0 429; GFX7-NEXT: ds_read_b32 v2, v2 offset:8 430; GFX7-NEXT: s_waitcnt lgkmcnt(0) 431; GFX7-NEXT: s_setpc_b64 s[30:31] 432; 433; GFX6-LABEL: load_lds_v3i32_align8: 434; GFX6: ; %bb.0: 435; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 436; GFX6-NEXT: v_mov_b32_e32 v2, v0 437; GFX6-NEXT: s_mov_b32 m0, -1 438; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 439; GFX6-NEXT: ds_read_b64 v[0:1], v0 440; GFX6-NEXT: ds_read_b32 v2, v2 441; GFX6-NEXT: s_waitcnt lgkmcnt(0) 442; GFX6-NEXT: s_setpc_b64 s[30:31] 443; 444; GFX10-LABEL: load_lds_v3i32_align8: 445; GFX10: ; %bb.0: 446; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; GFX10-NEXT: v_mov_b32_e32 v2, v0 448; GFX10-NEXT: ds_read_b64 v[0:1], v0 449; GFX10-NEXT: ds_read_b32 v2, v2 offset:8 450; GFX10-NEXT: s_waitcnt lgkmcnt(0) 451; GFX10-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX11-LABEL: load_lds_v3i32_align8: 454; GFX11: ; %bb.0: 455; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX11-NEXT: v_mov_b32_e32 v2, v0 457; GFX11-NEXT: ds_load_b64 v[0:1], v0 458; GFX11-NEXT: ds_load_b32 v2, v2 offset:8 459; GFX11-NEXT: s_waitcnt lgkmcnt(0) 460; GFX11-NEXT: s_setpc_b64 s[30:31] 461 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 8 462 ret <3 x i32> %load 463} 464 465define <3 x i32> @load_lds_v3i32_align16(ptr addrspace(3) %ptr) { 466; GFX9-LABEL: load_lds_v3i32_align16: 467; GFX9: ; %bb.0: 468; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX9-NEXT: ds_read_b96 v[0:2], v0 470; GFX9-NEXT: s_waitcnt lgkmcnt(0) 471; GFX9-NEXT: s_setpc_b64 s[30:31] 472; 473; GFX7-LABEL: load_lds_v3i32_align16: 474; GFX7: ; %bb.0: 475; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX7-NEXT: s_mov_b32 m0, -1 477; GFX7-NEXT: ds_read_b96 v[0:2], v0 478; GFX7-NEXT: s_waitcnt lgkmcnt(0) 479; GFX7-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX6-LABEL: load_lds_v3i32_align16: 482; GFX6: ; %bb.0: 483; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX6-NEXT: v_mov_b32_e32 v2, v0 485; GFX6-NEXT: s_mov_b32 m0, -1 486; GFX6-NEXT: v_add_i32_e32 v2, vcc, 8, v2 487; GFX6-NEXT: ds_read_b64 v[0:1], v0 488; GFX6-NEXT: ds_read_b32 v2, v2 489; GFX6-NEXT: s_waitcnt lgkmcnt(0) 490; GFX6-NEXT: s_setpc_b64 s[30:31] 491; 492; GFX10-LABEL: load_lds_v3i32_align16: 493; GFX10: ; %bb.0: 494; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; GFX10-NEXT: ds_read_b96 v[0:2], v0 496; GFX10-NEXT: s_waitcnt lgkmcnt(0) 497; GFX10-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX11-LABEL: load_lds_v3i32_align16: 500; GFX11: ; %bb.0: 501; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX11-NEXT: ds_load_b96 v[0:2], v0 503; GFX11-NEXT: s_waitcnt lgkmcnt(0) 504; GFX11-NEXT: s_setpc_b64 s[30:31] 505 %load = load <3 x i32>, ptr addrspace(3) %ptr, align 16 506 ret <3 x i32> %load 507} 508