1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,CI %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX8 %s 4; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s 5; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 6 7define i1 @i1_func_void() #0 { 8; GFX789-LABEL: i1_func_void: 9; GFX789: ; %bb.0: 10; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX789-NEXT: s_mov_b32 s7, 0xf000 12; GFX789-NEXT: s_mov_b32 s6, -1 13; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 14; GFX789-NEXT: s_waitcnt vmcnt(0) 15; GFX789-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX11-LABEL: i1_func_void: 18; GFX11: ; %bb.0: 19; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX11-NEXT: s_mov_b32 s3, 0x31016000 21; GFX11-NEXT: s_mov_b32 s2, -1 22; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 23; GFX11-NEXT: s_waitcnt vmcnt(0) 24; GFX11-NEXT: s_setpc_b64 s[30:31] 25 %val = load i1, ptr addrspace(1) undef 26 ret i1 %val 27} 28 29; FIXME: Missing and? 30define zeroext i1 @i1_zeroext_func_void() #0 { 31; GFX789-LABEL: i1_zeroext_func_void: 32; GFX789: ; %bb.0: 33; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX789-NEXT: s_mov_b32 s7, 0xf000 35; GFX789-NEXT: s_mov_b32 s6, -1 36; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 37; GFX789-NEXT: s_waitcnt vmcnt(0) 38; GFX789-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX11-LABEL: i1_zeroext_func_void: 41; GFX11: ; %bb.0: 42; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX11-NEXT: s_mov_b32 s3, 0x31016000 44; GFX11-NEXT: s_mov_b32 s2, -1 45; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 46; GFX11-NEXT: s_waitcnt vmcnt(0) 47; GFX11-NEXT: s_setpc_b64 s[30:31] 48 %val = load i1, ptr addrspace(1) undef 49 ret i1 %val 50} 51 52define signext i1 @i1_signext_func_void() #0 { 53; GFX789-LABEL: i1_signext_func_void: 54; GFX789: ; %bb.0: 55; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX789-NEXT: s_mov_b32 s7, 0xf000 57; GFX789-NEXT: s_mov_b32 s6, -1 58; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 59; GFX789-NEXT: s_waitcnt vmcnt(0) 60; GFX789-NEXT: v_bfe_i32 v0, v0, 0, 1 61; GFX789-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX11-LABEL: i1_signext_func_void: 64; GFX11: ; %bb.0: 65; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX11-NEXT: s_mov_b32 s3, 0x31016000 67; GFX11-NEXT: s_mov_b32 s2, -1 68; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 69; GFX11-NEXT: s_waitcnt vmcnt(0) 70; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1 71; GFX11-NEXT: s_setpc_b64 s[30:31] 72 %val = load i1, ptr addrspace(1) undef 73 ret i1 %val 74} 75 76define i8 @i8_func_void() #0 { 77; GFX789-LABEL: i8_func_void: 78; GFX789: ; %bb.0: 79; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX789-NEXT: s_mov_b32 s7, 0xf000 81; GFX789-NEXT: s_mov_b32 s6, -1 82; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 83; GFX789-NEXT: s_waitcnt vmcnt(0) 84; GFX789-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX11-LABEL: i8_func_void: 87; GFX11: ; %bb.0: 88; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX11-NEXT: s_mov_b32 s3, 0x31016000 90; GFX11-NEXT: s_mov_b32 s2, -1 91; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 92; GFX11-NEXT: s_waitcnt vmcnt(0) 93; GFX11-NEXT: s_setpc_b64 s[30:31] 94 %val = load i8, ptr addrspace(1) undef 95 ret i8 %val 96} 97 98define zeroext i8 @i8_zeroext_func_void() #0 { 99; GFX789-LABEL: i8_zeroext_func_void: 100; GFX789: ; %bb.0: 101; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX789-NEXT: s_mov_b32 s7, 0xf000 103; GFX789-NEXT: s_mov_b32 s6, -1 104; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 105; GFX789-NEXT: s_waitcnt vmcnt(0) 106; GFX789-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX11-LABEL: i8_zeroext_func_void: 109; GFX11: ; %bb.0: 110; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX11-NEXT: s_mov_b32 s3, 0x31016000 112; GFX11-NEXT: s_mov_b32 s2, -1 113; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 114; GFX11-NEXT: s_waitcnt vmcnt(0) 115; GFX11-NEXT: s_setpc_b64 s[30:31] 116 %val = load i8, ptr addrspace(1) undef 117 ret i8 %val 118} 119 120define signext i8 @i8_signext_func_void() #0 { 121; GFX789-LABEL: i8_signext_func_void: 122; GFX789: ; %bb.0: 123; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GFX789-NEXT: s_mov_b32 s7, 0xf000 125; GFX789-NEXT: s_mov_b32 s6, -1 126; GFX789-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 127; GFX789-NEXT: s_waitcnt vmcnt(0) 128; GFX789-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX11-LABEL: i8_signext_func_void: 131; GFX11: ; %bb.0: 132; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX11-NEXT: s_mov_b32 s3, 0x31016000 134; GFX11-NEXT: s_mov_b32 s2, -1 135; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 136; GFX11-NEXT: s_waitcnt vmcnt(0) 137; GFX11-NEXT: s_setpc_b64 s[30:31] 138 %val = load i8, ptr addrspace(1) undef 139 ret i8 %val 140} 141 142define i16 @i16_func_void() #0 { 143; GFX789-LABEL: i16_func_void: 144; GFX789: ; %bb.0: 145; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX789-NEXT: s_mov_b32 s7, 0xf000 147; GFX789-NEXT: s_mov_b32 s6, -1 148; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0 149; GFX789-NEXT: s_waitcnt vmcnt(0) 150; GFX789-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX11-LABEL: i16_func_void: 153; GFX11: ; %bb.0: 154; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX11-NEXT: s_mov_b32 s3, 0x31016000 156; GFX11-NEXT: s_mov_b32 s2, -1 157; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 158; GFX11-NEXT: s_waitcnt vmcnt(0) 159; GFX11-NEXT: s_setpc_b64 s[30:31] 160 %val = load i16, ptr addrspace(1) undef 161 ret i16 %val 162} 163 164define zeroext i16 @i16_zeroext_func_void() #0 { 165; GFX789-LABEL: i16_zeroext_func_void: 166; GFX789: ; %bb.0: 167; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX789-NEXT: s_mov_b32 s7, 0xf000 169; GFX789-NEXT: s_mov_b32 s6, -1 170; GFX789-NEXT: buffer_load_ushort v0, off, s[4:7], 0 171; GFX789-NEXT: s_waitcnt vmcnt(0) 172; GFX789-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX11-LABEL: i16_zeroext_func_void: 175; GFX11: ; %bb.0: 176; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX11-NEXT: s_mov_b32 s3, 0x31016000 178; GFX11-NEXT: s_mov_b32 s2, -1 179; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 180; GFX11-NEXT: s_waitcnt vmcnt(0) 181; GFX11-NEXT: s_setpc_b64 s[30:31] 182 %val = load i16, ptr addrspace(1) undef 183 ret i16 %val 184} 185 186define signext i16 @i16_signext_func_void() #0 { 187; GFX789-LABEL: i16_signext_func_void: 188; GFX789: ; %bb.0: 189; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX789-NEXT: s_mov_b32 s7, 0xf000 191; GFX789-NEXT: s_mov_b32 s6, -1 192; GFX789-NEXT: buffer_load_sshort v0, off, s[4:7], 0 193; GFX789-NEXT: s_waitcnt vmcnt(0) 194; GFX789-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX11-LABEL: i16_signext_func_void: 197; GFX11: ; %bb.0: 198; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX11-NEXT: s_mov_b32 s3, 0x31016000 200; GFX11-NEXT: s_mov_b32 s2, -1 201; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 202; GFX11-NEXT: s_waitcnt vmcnt(0) 203; GFX11-NEXT: s_setpc_b64 s[30:31] 204 %val = load i16, ptr addrspace(1) undef 205 ret i16 %val 206} 207 208define i32 @i32_func_void() #0 { 209; GFX789-LABEL: i32_func_void: 210; GFX789: ; %bb.0: 211; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX789-NEXT: s_mov_b32 s7, 0xf000 213; GFX789-NEXT: s_mov_b32 s6, -1 214; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 215; GFX789-NEXT: s_waitcnt vmcnt(0) 216; GFX789-NEXT: s_setpc_b64 s[30:31] 217; 218; GFX11-LABEL: i32_func_void: 219; GFX11: ; %bb.0: 220; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX11-NEXT: s_mov_b32 s3, 0x31016000 222; GFX11-NEXT: s_mov_b32 s2, -1 223; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 224; GFX11-NEXT: s_waitcnt vmcnt(0) 225; GFX11-NEXT: s_setpc_b64 s[30:31] 226 %val = load i32, ptr addrspace(1) undef 227 ret i32 %val 228} 229 230define i48 @i48_func_void() #0 { 231; GFX789-LABEL: i48_func_void: 232; GFX789: ; %bb.0: 233; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX789-NEXT: s_mov_b32 s7, 0xf000 235; GFX789-NEXT: s_mov_b32 s6, -1 236; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 237; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0 238; GFX789-NEXT: s_waitcnt vmcnt(0) 239; GFX789-NEXT: s_setpc_b64 s[30:31] 240; 241; GFX11-LABEL: i48_func_void: 242; GFX11: ; %bb.0: 243; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; GFX11-NEXT: s_mov_b32 s3, 0x31016000 245; GFX11-NEXT: s_mov_b32 s2, -1 246; GFX11-NEXT: s_clause 0x1 247; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 248; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 249; GFX11-NEXT: s_waitcnt vmcnt(0) 250; GFX11-NEXT: s_setpc_b64 s[30:31] 251 %val = load i48, ptr addrspace(1) undef, align 8 252 ret i48 %val 253} 254 255define zeroext i48 @i48_zeroext_func_void() #0 { 256; GFX789-LABEL: i48_zeroext_func_void: 257; GFX789: ; %bb.0: 258; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX789-NEXT: s_mov_b32 s7, 0xf000 260; GFX789-NEXT: s_mov_b32 s6, -1 261; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 262; GFX789-NEXT: buffer_load_ushort v1, off, s[4:7], 0 263; GFX789-NEXT: s_waitcnt vmcnt(0) 264; GFX789-NEXT: s_setpc_b64 s[30:31] 265; 266; GFX11-LABEL: i48_zeroext_func_void: 267; GFX11: ; %bb.0: 268; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; GFX11-NEXT: s_mov_b32 s3, 0x31016000 270; GFX11-NEXT: s_mov_b32 s2, -1 271; GFX11-NEXT: s_clause 0x1 272; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 273; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 274; GFX11-NEXT: s_waitcnt vmcnt(0) 275; GFX11-NEXT: s_setpc_b64 s[30:31] 276 %val = load i48, ptr addrspace(1) undef, align 8 277 ret i48 %val 278} 279 280define signext i48 @i48_signext_func_void() #0 { 281; GFX789-LABEL: i48_signext_func_void: 282; GFX789: ; %bb.0: 283; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284; GFX789-NEXT: s_mov_b32 s7, 0xf000 285; GFX789-NEXT: s_mov_b32 s6, -1 286; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 287; GFX789-NEXT: buffer_load_sshort v1, off, s[4:7], 0 288; GFX789-NEXT: s_waitcnt vmcnt(0) 289; GFX789-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX11-LABEL: i48_signext_func_void: 292; GFX11: ; %bb.0: 293; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX11-NEXT: s_mov_b32 s3, 0x31016000 295; GFX11-NEXT: s_mov_b32 s2, -1 296; GFX11-NEXT: s_clause 0x1 297; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 298; GFX11-NEXT: buffer_load_i16 v1, off, s[0:3], 0 299; GFX11-NEXT: s_waitcnt vmcnt(0) 300; GFX11-NEXT: s_setpc_b64 s[30:31] 301 %val = load i48, ptr addrspace(1) undef, align 8 302 ret i48 %val 303} 304 305define i63 @i63_func_void(i63 %val) #0 { 306; GFX789-LABEL: i63_func_void: 307; GFX789: ; %bb.0: 308; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX789-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX11-LABEL: i63_func_void: 312; GFX11: ; %bb.0: 313; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX11-NEXT: s_setpc_b64 s[30:31] 315 ret i63 %val 316} 317 318define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 { 319; GFX789-LABEL: i63_zeroext_func_void: 320; GFX789: ; %bb.0: 321; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX789-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 323; GFX789-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX11-LABEL: i63_zeroext_func_void: 326; GFX11: ; %bb.0: 327; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX11-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 329; GFX11-NEXT: s_setpc_b64 s[30:31] 330 ret i63 %val 331} 332 333define signext i63 @i63_signext_func_void(i63 %val) #0 { 334; CI-LABEL: i63_signext_func_void: 335; CI: ; %bb.0: 336; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 338; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1 339; CI-NEXT: s_setpc_b64 s[30:31] 340; 341; GFX89-LABEL: i63_signext_func_void: 342; GFX89: ; %bb.0: 343; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX89-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 345; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] 346; GFX89-NEXT: s_setpc_b64 s[30:31] 347; 348; GFX11-LABEL: i63_signext_func_void: 349; GFX11: ; %bb.0: 350; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 352; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 353; GFX11-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1] 354; GFX11-NEXT: s_setpc_b64 s[30:31] 355 ret i63 %val 356} 357 358define i64 @i64_func_void() #0 { 359; GFX789-LABEL: i64_func_void: 360; GFX789: ; %bb.0: 361; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 362; GFX789-NEXT: s_mov_b32 s7, 0xf000 363; GFX789-NEXT: s_mov_b32 s6, -1 364; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 365; GFX789-NEXT: s_waitcnt vmcnt(0) 366; GFX789-NEXT: s_setpc_b64 s[30:31] 367; 368; GFX11-LABEL: i64_func_void: 369; GFX11: ; %bb.0: 370; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371; GFX11-NEXT: s_mov_b32 s3, 0x31016000 372; GFX11-NEXT: s_mov_b32 s2, -1 373; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 374; GFX11-NEXT: s_waitcnt vmcnt(0) 375; GFX11-NEXT: s_setpc_b64 s[30:31] 376 %val = load i64, ptr addrspace(1) undef 377 ret i64 %val 378} 379 380define i65 @i65_func_void() #0 { 381; GFX789-LABEL: i65_func_void: 382; GFX789: ; %bb.0: 383; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX789-NEXT: s_mov_b32 s7, 0xf000 385; GFX789-NEXT: s_mov_b32 s6, -1 386; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 387; GFX789-NEXT: buffer_load_ubyte v2, off, s[4:7], 0 388; GFX789-NEXT: s_waitcnt vmcnt(0) 389; GFX789-NEXT: s_setpc_b64 s[30:31] 390; 391; GFX11-LABEL: i65_func_void: 392; GFX11: ; %bb.0: 393; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GFX11-NEXT: s_mov_b32 s3, 0x31016000 395; GFX11-NEXT: s_mov_b32 s2, -1 396; GFX11-NEXT: s_clause 0x1 397; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 398; GFX11-NEXT: buffer_load_u8 v2, off, s[0:3], 0 399; GFX11-NEXT: s_waitcnt vmcnt(0) 400; GFX11-NEXT: s_setpc_b64 s[30:31] 401 %val = load i65, ptr addrspace(1) undef 402 ret i65 %val 403} 404 405define float @f32_func_void() #0 { 406; GFX789-LABEL: f32_func_void: 407; GFX789: ; %bb.0: 408; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX789-NEXT: s_mov_b32 s7, 0xf000 410; GFX789-NEXT: s_mov_b32 s6, -1 411; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 412; GFX789-NEXT: s_waitcnt vmcnt(0) 413; GFX789-NEXT: s_setpc_b64 s[30:31] 414; 415; GFX11-LABEL: f32_func_void: 416; GFX11: ; %bb.0: 417; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 418; GFX11-NEXT: s_mov_b32 s3, 0x31016000 419; GFX11-NEXT: s_mov_b32 s2, -1 420; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 421; GFX11-NEXT: s_waitcnt vmcnt(0) 422; GFX11-NEXT: s_setpc_b64 s[30:31] 423 %val = load float, ptr addrspace(1) undef 424 ret float %val 425} 426 427define double @f64_func_void() #0 { 428; GFX789-LABEL: f64_func_void: 429; GFX789: ; %bb.0: 430; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX789-NEXT: s_mov_b32 s7, 0xf000 432; GFX789-NEXT: s_mov_b32 s6, -1 433; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 434; GFX789-NEXT: s_waitcnt vmcnt(0) 435; GFX789-NEXT: s_setpc_b64 s[30:31] 436; 437; GFX11-LABEL: f64_func_void: 438; GFX11: ; %bb.0: 439; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX11-NEXT: s_mov_b32 s3, 0x31016000 441; GFX11-NEXT: s_mov_b32 s2, -1 442; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 443; GFX11-NEXT: s_waitcnt vmcnt(0) 444; GFX11-NEXT: s_setpc_b64 s[30:31] 445 %val = load double, ptr addrspace(1) undef 446 ret double %val 447} 448 449define <2 x double> @v2f64_func_void() #0 { 450; GFX789-LABEL: v2f64_func_void: 451; GFX789: ; %bb.0: 452; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GFX789-NEXT: s_mov_b32 s7, 0xf000 454; GFX789-NEXT: s_mov_b32 s6, -1 455; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 456; GFX789-NEXT: s_waitcnt vmcnt(0) 457; GFX789-NEXT: s_setpc_b64 s[30:31] 458; 459; GFX11-LABEL: v2f64_func_void: 460; GFX11: ; %bb.0: 461; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX11-NEXT: s_mov_b32 s3, 0x31016000 463; GFX11-NEXT: s_mov_b32 s2, -1 464; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 465; GFX11-NEXT: s_waitcnt vmcnt(0) 466; GFX11-NEXT: s_setpc_b64 s[30:31] 467 %val = load <2 x double>, ptr addrspace(1) undef 468 ret <2 x double> %val 469} 470 471define <2 x i32> @v2i32_func_void() #0 { 472; GFX789-LABEL: v2i32_func_void: 473; GFX789: ; %bb.0: 474; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX789-NEXT: s_mov_b32 s7, 0xf000 476; GFX789-NEXT: s_mov_b32 s6, -1 477; GFX789-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 478; GFX789-NEXT: s_waitcnt vmcnt(0) 479; GFX789-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX11-LABEL: v2i32_func_void: 482; GFX11: ; %bb.0: 483; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX11-NEXT: s_mov_b32 s3, 0x31016000 485; GFX11-NEXT: s_mov_b32 s2, -1 486; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 487; GFX11-NEXT: s_waitcnt vmcnt(0) 488; GFX11-NEXT: s_setpc_b64 s[30:31] 489 %val = load <2 x i32>, ptr addrspace(1) undef 490 ret <2 x i32> %val 491} 492 493define <3 x i32> @v3i32_func_void() #0 { 494; GFX789-LABEL: v3i32_func_void: 495; GFX789: ; %bb.0: 496; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 497; GFX789-NEXT: s_mov_b32 s7, 0xf000 498; GFX789-NEXT: s_mov_b32 s6, -1 499; GFX789-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0 500; GFX789-NEXT: s_waitcnt vmcnt(0) 501; GFX789-NEXT: s_setpc_b64 s[30:31] 502; 503; GFX11-LABEL: v3i32_func_void: 504; GFX11: ; %bb.0: 505; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 506; GFX11-NEXT: s_mov_b32 s3, 0x31016000 507; GFX11-NEXT: s_mov_b32 s2, -1 508; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0 509; GFX11-NEXT: s_waitcnt vmcnt(0) 510; GFX11-NEXT: s_setpc_b64 s[30:31] 511 %val = load <3 x i32>, ptr addrspace(1) undef 512 ret <3 x i32> %val 513} 514 515define <4 x i32> @v4i32_func_void() #0 { 516; GFX789-LABEL: v4i32_func_void: 517; GFX789: ; %bb.0: 518; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX789-NEXT: s_mov_b32 s7, 0xf000 520; GFX789-NEXT: s_mov_b32 s6, -1 521; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 522; GFX789-NEXT: s_waitcnt vmcnt(0) 523; GFX789-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX11-LABEL: v4i32_func_void: 526; GFX11: ; %bb.0: 527; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX11-NEXT: s_mov_b32 s3, 0x31016000 529; GFX11-NEXT: s_mov_b32 s2, -1 530; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 531; GFX11-NEXT: s_waitcnt vmcnt(0) 532; GFX11-NEXT: s_setpc_b64 s[30:31] 533 %val = load <4 x i32>, ptr addrspace(1) undef 534 ret <4 x i32> %val 535} 536 537define <5 x i32> @v5i32_func_void() #0 { 538; GFX789-LABEL: v5i32_func_void: 539; GFX789: ; %bb.0: 540; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; GFX789-NEXT: s_mov_b32 s7, 0xf000 542; GFX789-NEXT: s_mov_b32 s6, -1 543; GFX789-NEXT: buffer_load_dword v4, off, s[4:7], 0 glc 544; GFX789-NEXT: s_waitcnt vmcnt(0) 545; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 glc 546; GFX789-NEXT: s_waitcnt vmcnt(0) 547; GFX789-NEXT: s_setpc_b64 s[30:31] 548; 549; GFX11-LABEL: v5i32_func_void: 550; GFX11: ; %bb.0: 551; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; GFX11-NEXT: s_mov_b32 s3, 0x31016000 553; GFX11-NEXT: s_mov_b32 s2, -1 554; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 glc dlc 555; GFX11-NEXT: s_waitcnt vmcnt(0) 556; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc 557; GFX11-NEXT: s_waitcnt vmcnt(0) 558; GFX11-NEXT: s_setpc_b64 s[30:31] 559 %val = load volatile <5 x i32>, ptr addrspace(1) undef 560 ret <5 x i32> %val 561} 562 563define <8 x i32> @v8i32_func_void() #0 { 564; GFX789-LABEL: v8i32_func_void: 565; GFX789: ; %bb.0: 566; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 567; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 568; GFX789-NEXT: s_mov_b32 s7, 0xf000 569; GFX789-NEXT: s_mov_b32 s6, -1 570; GFX789-NEXT: s_waitcnt lgkmcnt(0) 571; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 572; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 573; GFX789-NEXT: s_waitcnt vmcnt(0) 574; GFX789-NEXT: s_setpc_b64 s[30:31] 575; 576; GFX11-LABEL: v8i32_func_void: 577; GFX11: ; %bb.0: 578; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 579; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 580; GFX11-NEXT: s_mov_b32 s3, 0x31016000 581; GFX11-NEXT: s_mov_b32 s2, -1 582; GFX11-NEXT: s_waitcnt lgkmcnt(0) 583; GFX11-NEXT: s_clause 0x1 584; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 585; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 586; GFX11-NEXT: s_waitcnt vmcnt(0) 587; GFX11-NEXT: s_setpc_b64 s[30:31] 588 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 589 %val = load <8 x i32>, ptr addrspace(1) %ptr 590 ret <8 x i32> %val 591} 592 593define <16 x i32> @v16i32_func_void() #0 { 594; GFX789-LABEL: v16i32_func_void: 595; GFX789: ; %bb.0: 596; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 598; GFX789-NEXT: s_mov_b32 s7, 0xf000 599; GFX789-NEXT: s_mov_b32 s6, -1 600; GFX789-NEXT: s_waitcnt lgkmcnt(0) 601; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 602; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 603; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 604; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 605; GFX789-NEXT: s_waitcnt vmcnt(0) 606; GFX789-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX11-LABEL: v16i32_func_void: 609; GFX11: ; %bb.0: 610; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 612; GFX11-NEXT: s_mov_b32 s3, 0x31016000 613; GFX11-NEXT: s_mov_b32 s2, -1 614; GFX11-NEXT: s_waitcnt lgkmcnt(0) 615; GFX11-NEXT: s_clause 0x3 616; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 617; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 618; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 619; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 620; GFX11-NEXT: s_waitcnt vmcnt(0) 621; GFX11-NEXT: s_setpc_b64 s[30:31] 622 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 623 %val = load <16 x i32>, ptr addrspace(1) %ptr 624 ret <16 x i32> %val 625} 626 627define <32 x i32> @v32i32_func_void() #0 { 628; GFX789-LABEL: v32i32_func_void: 629; GFX789: ; %bb.0: 630; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 632; GFX789-NEXT: s_mov_b32 s7, 0xf000 633; GFX789-NEXT: s_mov_b32 s6, -1 634; GFX789-NEXT: s_waitcnt lgkmcnt(0) 635; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 636; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 637; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 638; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 639; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 640; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 641; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 642; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 643; GFX789-NEXT: s_waitcnt vmcnt(0) 644; GFX789-NEXT: s_setpc_b64 s[30:31] 645; 646; GFX11-LABEL: v32i32_func_void: 647; GFX11: ; %bb.0: 648; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 649; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 650; GFX11-NEXT: s_mov_b32 s3, 0x31016000 651; GFX11-NEXT: s_mov_b32 s2, -1 652; GFX11-NEXT: s_waitcnt lgkmcnt(0) 653; GFX11-NEXT: s_clause 0x7 654; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 655; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 656; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 657; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 658; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64 659; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80 660; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96 661; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112 662; GFX11-NEXT: s_waitcnt vmcnt(0) 663; GFX11-NEXT: s_setpc_b64 s[30:31] 664 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 665 %val = load <32 x i32>, ptr addrspace(1) %ptr 666 ret <32 x i32> %val 667} 668 669define <2 x i64> @v2i64_func_void() #0 { 670; GFX789-LABEL: v2i64_func_void: 671; GFX789: ; %bb.0: 672; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 673; GFX789-NEXT: s_mov_b32 s7, 0xf000 674; GFX789-NEXT: s_mov_b32 s6, -1 675; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 676; GFX789-NEXT: s_waitcnt vmcnt(0) 677; GFX789-NEXT: s_setpc_b64 s[30:31] 678; 679; GFX11-LABEL: v2i64_func_void: 680; GFX11: ; %bb.0: 681; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 682; GFX11-NEXT: s_mov_b32 s3, 0x31016000 683; GFX11-NEXT: s_mov_b32 s2, -1 684; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 685; GFX11-NEXT: s_waitcnt vmcnt(0) 686; GFX11-NEXT: s_setpc_b64 s[30:31] 687 %val = load <2 x i64>, ptr addrspace(1) undef 688 ret <2 x i64> %val 689} 690 691define <3 x i64> @v3i64_func_void() #0 { 692; GFX789-LABEL: v3i64_func_void: 693; GFX789: ; %bb.0: 694; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 696; GFX789-NEXT: s_mov_b32 s7, 0xf000 697; GFX789-NEXT: s_mov_b32 s6, -1 698; GFX789-NEXT: s_waitcnt lgkmcnt(0) 699; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 700; GFX789-NEXT: buffer_load_dwordx2 v[4:5], off, s[4:7], 0 offset:16 701; GFX789-NEXT: s_waitcnt vmcnt(0) 702; GFX789-NEXT: s_setpc_b64 s[30:31] 703; 704; GFX11-LABEL: v3i64_func_void: 705; GFX11: ; %bb.0: 706; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 707; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 708; GFX11-NEXT: s_mov_b32 s3, 0x31016000 709; GFX11-NEXT: s_mov_b32 s2, -1 710; GFX11-NEXT: s_waitcnt lgkmcnt(0) 711; GFX11-NEXT: s_clause 0x1 712; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 713; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:16 714; GFX11-NEXT: s_waitcnt vmcnt(0) 715; GFX11-NEXT: s_setpc_b64 s[30:31] 716 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 717 %val = load <3 x i64>, ptr addrspace(1) %ptr 718 ret <3 x i64> %val 719} 720 721define <4 x i64> @v4i64_func_void() #0 { 722; GFX789-LABEL: v4i64_func_void: 723; GFX789: ; %bb.0: 724; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 725; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 726; GFX789-NEXT: s_mov_b32 s7, 0xf000 727; GFX789-NEXT: s_mov_b32 s6, -1 728; GFX789-NEXT: s_waitcnt lgkmcnt(0) 729; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 730; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 731; GFX789-NEXT: s_waitcnt vmcnt(0) 732; GFX789-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX11-LABEL: v4i64_func_void: 735; GFX11: ; %bb.0: 736; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 738; GFX11-NEXT: s_mov_b32 s3, 0x31016000 739; GFX11-NEXT: s_mov_b32 s2, -1 740; GFX11-NEXT: s_waitcnt lgkmcnt(0) 741; GFX11-NEXT: s_clause 0x1 742; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 743; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 744; GFX11-NEXT: s_waitcnt vmcnt(0) 745; GFX11-NEXT: s_setpc_b64 s[30:31] 746 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 747 %val = load <4 x i64>, ptr addrspace(1) %ptr 748 ret <4 x i64> %val 749} 750 751define <5 x i64> @v5i64_func_void() #0 { 752; GFX789-LABEL: v5i64_func_void: 753; GFX789: ; %bb.0: 754; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 756; GFX789-NEXT: s_mov_b32 s7, 0xf000 757; GFX789-NEXT: s_mov_b32 s6, -1 758; GFX789-NEXT: s_waitcnt lgkmcnt(0) 759; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 760; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 761; GFX789-NEXT: buffer_load_dwordx2 v[8:9], off, s[4:7], 0 offset:32 762; GFX789-NEXT: s_waitcnt vmcnt(0) 763; GFX789-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX11-LABEL: v5i64_func_void: 766; GFX11: ; %bb.0: 767; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 769; GFX11-NEXT: s_mov_b32 s3, 0x31016000 770; GFX11-NEXT: s_mov_b32 s2, -1 771; GFX11-NEXT: s_waitcnt lgkmcnt(0) 772; GFX11-NEXT: s_clause 0x2 773; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 774; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 775; GFX11-NEXT: buffer_load_b64 v[8:9], off, s[0:3], 0 offset:32 776; GFX11-NEXT: s_waitcnt vmcnt(0) 777; GFX11-NEXT: s_setpc_b64 s[30:31] 778 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 779 %val = load <5 x i64>, ptr addrspace(1) %ptr 780 ret <5 x i64> %val 781} 782 783define <8 x i64> @v8i64_func_void() #0 { 784; GFX789-LABEL: v8i64_func_void: 785; GFX789: ; %bb.0: 786; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 787; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 788; GFX789-NEXT: s_mov_b32 s7, 0xf000 789; GFX789-NEXT: s_mov_b32 s6, -1 790; GFX789-NEXT: s_waitcnt lgkmcnt(0) 791; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 792; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 793; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 794; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 795; GFX789-NEXT: s_waitcnt vmcnt(0) 796; GFX789-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX11-LABEL: v8i64_func_void: 799; GFX11: ; %bb.0: 800; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 802; GFX11-NEXT: s_mov_b32 s3, 0x31016000 803; GFX11-NEXT: s_mov_b32 s2, -1 804; GFX11-NEXT: s_waitcnt lgkmcnt(0) 805; GFX11-NEXT: s_clause 0x3 806; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 807; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 808; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 809; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 810; GFX11-NEXT: s_waitcnt vmcnt(0) 811; GFX11-NEXT: s_setpc_b64 s[30:31] 812 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 813 %val = load <8 x i64>, ptr addrspace(1) %ptr 814 ret <8 x i64> %val 815} 816 817define <16 x i64> @v16i64_func_void() #0 { 818; GFX789-LABEL: v16i64_func_void: 819; GFX789: ; %bb.0: 820; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 821; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 822; GFX789-NEXT: s_mov_b32 s7, 0xf000 823; GFX789-NEXT: s_mov_b32 s6, -1 824; GFX789-NEXT: s_waitcnt lgkmcnt(0) 825; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 826; GFX789-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 827; GFX789-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 828; GFX789-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 829; GFX789-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 830; GFX789-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 831; GFX789-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 832; GFX789-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 833; GFX789-NEXT: s_waitcnt vmcnt(0) 834; GFX789-NEXT: s_setpc_b64 s[30:31] 835; 836; GFX11-LABEL: v16i64_func_void: 837; GFX11: ; %bb.0: 838; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 840; GFX11-NEXT: s_mov_b32 s3, 0x31016000 841; GFX11-NEXT: s_mov_b32 s2, -1 842; GFX11-NEXT: s_waitcnt lgkmcnt(0) 843; GFX11-NEXT: s_clause 0x7 844; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 845; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 846; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32 847; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48 848; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64 849; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80 850; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96 851; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112 852; GFX11-NEXT: s_waitcnt vmcnt(0) 853; GFX11-NEXT: s_setpc_b64 s[30:31] 854 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 855 %val = load <16 x i64>, ptr addrspace(1) %ptr 856 ret <16 x i64> %val 857} 858 859define <2 x i16> @v2i16_func_void() #0 { 860; CI-LABEL: v2i16_func_void: 861; CI: ; %bb.0: 862; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 863; CI-NEXT: s_mov_b32 s7, 0xf000 864; CI-NEXT: s_mov_b32 s6, -1 865; CI-NEXT: buffer_load_dword v0, off, s[4:7], 0 866; CI-NEXT: s_waitcnt vmcnt(0) 867; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 868; CI-NEXT: s_setpc_b64 s[30:31] 869; 870; GFX89-LABEL: v2i16_func_void: 871; GFX89: ; %bb.0: 872; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 873; GFX89-NEXT: s_mov_b32 s7, 0xf000 874; GFX89-NEXT: s_mov_b32 s6, -1 875; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0 876; GFX89-NEXT: s_waitcnt vmcnt(0) 877; GFX89-NEXT: s_setpc_b64 s[30:31] 878; 879; GFX11-LABEL: v2i16_func_void: 880; GFX11: ; %bb.0: 881; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 882; GFX11-NEXT: s_mov_b32 s3, 0x31016000 883; GFX11-NEXT: s_mov_b32 s2, -1 884; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 885; GFX11-NEXT: s_waitcnt vmcnt(0) 886; GFX11-NEXT: s_setpc_b64 s[30:31] 887 %val = load <2 x i16>, ptr addrspace(1) undef 888 ret <2 x i16> %val 889} 890 891define <3 x i16> @v3i16_func_void() #0 { 892; CI-LABEL: v3i16_func_void: 893; CI: ; %bb.0: 894; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 895; CI-NEXT: s_mov_b32 s7, 0xf000 896; CI-NEXT: s_mov_b32 s6, -1 897; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 898; CI-NEXT: s_waitcnt vmcnt(0) 899; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16 900; CI-NEXT: v_mov_b32_e32 v0, v2 901; CI-NEXT: v_mov_b32_e32 v2, v3 902; CI-NEXT: s_setpc_b64 s[30:31] 903; 904; GFX89-LABEL: v3i16_func_void: 905; GFX89: ; %bb.0: 906; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 907; GFX89-NEXT: s_mov_b32 s7, 0xf000 908; GFX89-NEXT: s_mov_b32 s6, -1 909; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 910; GFX89-NEXT: s_waitcnt vmcnt(0) 911; GFX89-NEXT: s_setpc_b64 s[30:31] 912; 913; GFX11-LABEL: v3i16_func_void: 914; GFX11: ; %bb.0: 915; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 916; GFX11-NEXT: s_mov_b32 s3, 0x31016000 917; GFX11-NEXT: s_mov_b32 s2, -1 918; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 919; GFX11-NEXT: s_waitcnt vmcnt(0) 920; GFX11-NEXT: s_setpc_b64 s[30:31] 921 %val = load <3 x i16>, ptr addrspace(1) undef 922 ret <3 x i16> %val 923} 924 925define <4 x i16> @v4i16_func_void() #0 { 926; CI-LABEL: v4i16_func_void: 927; CI: ; %bb.0: 928; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 929; CI-NEXT: s_mov_b32 s7, 0xf000 930; CI-NEXT: s_mov_b32 s6, -1 931; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 932; CI-NEXT: s_waitcnt vmcnt(0) 933; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 934; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 935; CI-NEXT: v_mov_b32_e32 v2, v1 936; CI-NEXT: v_mov_b32_e32 v1, v4 937; CI-NEXT: s_setpc_b64 s[30:31] 938; 939; GFX89-LABEL: v4i16_func_void: 940; GFX89: ; %bb.0: 941; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 942; GFX89-NEXT: s_mov_b32 s7, 0xf000 943; GFX89-NEXT: s_mov_b32 s6, -1 944; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 945; GFX89-NEXT: s_waitcnt vmcnt(0) 946; GFX89-NEXT: s_setpc_b64 s[30:31] 947; 948; GFX11-LABEL: v4i16_func_void: 949; GFX11: ; %bb.0: 950; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 951; GFX11-NEXT: s_mov_b32 s3, 0x31016000 952; GFX11-NEXT: s_mov_b32 s2, -1 953; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 954; GFX11-NEXT: s_waitcnt vmcnt(0) 955; GFX11-NEXT: s_setpc_b64 s[30:31] 956 %val = load <4 x i16>, ptr addrspace(1) undef 957 ret <4 x i16> %val 958} 959 960define <4 x half> @v4f16_func_void() #0 { 961; CI-LABEL: v4f16_func_void: 962; CI: ; %bb.0: 963; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 964; CI-NEXT: s_mov_b32 s7, 0xf000 965; CI-NEXT: s_mov_b32 s6, -1 966; CI-NEXT: buffer_load_dwordx2 v[3:4], off, s[4:7], 0 967; CI-NEXT: s_waitcnt vmcnt(0) 968; CI-NEXT: v_cvt_f32_f16_e32 v0, v3 969; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v3 970; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v4 971; CI-NEXT: v_cvt_f32_f16_e32 v2, v4 972; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 973; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 974; CI-NEXT: s_setpc_b64 s[30:31] 975; 976; GFX89-LABEL: v4f16_func_void: 977; GFX89: ; %bb.0: 978; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 979; GFX89-NEXT: s_mov_b32 s7, 0xf000 980; GFX89-NEXT: s_mov_b32 s6, -1 981; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 982; GFX89-NEXT: s_waitcnt vmcnt(0) 983; GFX89-NEXT: s_setpc_b64 s[30:31] 984; 985; GFX11-LABEL: v4f16_func_void: 986; GFX11: ; %bb.0: 987; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988; GFX11-NEXT: s_mov_b32 s3, 0x31016000 989; GFX11-NEXT: s_mov_b32 s2, -1 990; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 991; GFX11-NEXT: s_waitcnt vmcnt(0) 992; GFX11-NEXT: s_setpc_b64 s[30:31] 993 %val = load <4 x half>, ptr addrspace(1) undef 994 ret <4 x half> %val 995} 996 997; FIXME: Mixing buffer and global 998; FIXME: Should not scalarize 999define <5 x i16> @v5i16_func_void() #0 { 1000; CI-LABEL: v5i16_func_void: 1001; CI: ; %bb.0: 1002; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1003; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1004; CI-NEXT: s_mov_b32 s7, 0xf000 1005; CI-NEXT: s_mov_b32 s6, -1 1006; CI-NEXT: s_waitcnt lgkmcnt(0) 1007; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 1008; CI-NEXT: buffer_load_sshort v4, off, s[4:7], 0 offset:8 1009; CI-NEXT: s_waitcnt vmcnt(1) 1010; CI-NEXT: v_alignbit_b32 v5, v1, v0, 16 1011; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1012; CI-NEXT: v_mov_b32_e32 v2, v1 1013; CI-NEXT: v_mov_b32_e32 v1, v5 1014; CI-NEXT: s_waitcnt vmcnt(0) 1015; CI-NEXT: s_setpc_b64 s[30:31] 1016; 1017; GFX89-LABEL: v5i16_func_void: 1018; GFX89: ; %bb.0: 1019; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1020; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1021; GFX89-NEXT: s_mov_b32 s7, 0xf000 1022; GFX89-NEXT: s_mov_b32 s6, -1 1023; GFX89-NEXT: s_waitcnt lgkmcnt(0) 1024; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 1025; GFX89-NEXT: s_waitcnt vmcnt(0) 1026; GFX89-NEXT: s_setpc_b64 s[30:31] 1027; 1028; GFX11-LABEL: v5i16_func_void: 1029; GFX11: ; %bb.0: 1030; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1031; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1032; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1033; GFX11-NEXT: s_mov_b32 s2, -1 1034; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1035; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 1036; GFX11-NEXT: s_waitcnt vmcnt(0) 1037; GFX11-NEXT: s_setpc_b64 s[30:31] 1038 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1039 %val = load <5 x i16>, ptr addrspace(1) %ptr 1040 ret <5 x i16> %val 1041} 1042 1043define <8 x i16> @v8i16_func_void() #0 { 1044; CI-LABEL: v8i16_func_void: 1045; CI: ; %bb.0: 1046; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1047; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1048; CI-NEXT: s_mov_b32 s7, 0xf000 1049; CI-NEXT: s_mov_b32 s6, -1 1050; CI-NEXT: s_waitcnt lgkmcnt(0) 1051; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 1052; CI-NEXT: s_waitcnt vmcnt(0) 1053; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v8 1054; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v9 1055; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 1056; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 1057; CI-NEXT: v_mov_b32_e32 v0, v8 1058; CI-NEXT: v_mov_b32_e32 v2, v9 1059; CI-NEXT: v_mov_b32_e32 v4, v10 1060; CI-NEXT: v_mov_b32_e32 v6, v11 1061; CI-NEXT: s_setpc_b64 s[30:31] 1062; 1063; GFX89-LABEL: v8i16_func_void: 1064; GFX89: ; %bb.0: 1065; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1066; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1067; GFX89-NEXT: s_mov_b32 s7, 0xf000 1068; GFX89-NEXT: s_mov_b32 s6, -1 1069; GFX89-NEXT: s_waitcnt lgkmcnt(0) 1070; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 1071; GFX89-NEXT: s_waitcnt vmcnt(0) 1072; GFX89-NEXT: s_setpc_b64 s[30:31] 1073; 1074; GFX11-LABEL: v8i16_func_void: 1075; GFX11: ; %bb.0: 1076; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1077; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1078; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1079; GFX11-NEXT: s_mov_b32 s2, -1 1080; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1081; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 1082; GFX11-NEXT: s_waitcnt vmcnt(0) 1083; GFX11-NEXT: s_setpc_b64 s[30:31] 1084 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1085 %val = load <8 x i16>, ptr addrspace(1) %ptr 1086 ret <8 x i16> %val 1087} 1088 1089define <16 x i16> @v16i16_func_void() #0 { 1090; CI-LABEL: v16i16_func_void: 1091; CI: ; %bb.0: 1092; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1093; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1094; CI-NEXT: s_mov_b32 s7, 0xf000 1095; CI-NEXT: s_mov_b32 s6, -1 1096; CI-NEXT: s_waitcnt lgkmcnt(0) 1097; CI-NEXT: buffer_load_dwordx4 v[22:25], off, s[4:7], 0 1098; CI-NEXT: buffer_load_dwordx4 v[18:21], off, s[4:7], 0 offset:16 1099; CI-NEXT: s_waitcnt vmcnt(1) 1100; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v22 1101; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v23 1102; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v24 1103; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v25 1104; CI-NEXT: s_waitcnt vmcnt(0) 1105; CI-NEXT: v_lshrrev_b32_e32 v9, 16, v18 1106; CI-NEXT: v_lshrrev_b32_e32 v11, 16, v19 1107; CI-NEXT: v_lshrrev_b32_e32 v13, 16, v20 1108; CI-NEXT: v_lshrrev_b32_e32 v15, 16, v21 1109; CI-NEXT: v_mov_b32_e32 v0, v22 1110; CI-NEXT: v_mov_b32_e32 v2, v23 1111; CI-NEXT: v_mov_b32_e32 v4, v24 1112; CI-NEXT: v_mov_b32_e32 v6, v25 1113; CI-NEXT: v_mov_b32_e32 v8, v18 1114; CI-NEXT: v_mov_b32_e32 v10, v19 1115; CI-NEXT: v_mov_b32_e32 v12, v20 1116; CI-NEXT: v_mov_b32_e32 v14, v21 1117; CI-NEXT: s_setpc_b64 s[30:31] 1118; 1119; GFX89-LABEL: v16i16_func_void: 1120; GFX89: ; %bb.0: 1121; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1122; GFX89-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1123; GFX89-NEXT: s_mov_b32 s7, 0xf000 1124; GFX89-NEXT: s_mov_b32 s6, -1 1125; GFX89-NEXT: s_waitcnt lgkmcnt(0) 1126; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 1127; GFX89-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 1128; GFX89-NEXT: s_waitcnt vmcnt(0) 1129; GFX89-NEXT: s_setpc_b64 s[30:31] 1130; 1131; GFX11-LABEL: v16i16_func_void: 1132; GFX11: ; %bb.0: 1133; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1134; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1135; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1136; GFX11-NEXT: s_mov_b32 s2, -1 1137; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1138; GFX11-NEXT: s_clause 0x1 1139; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 1140; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16 1141; GFX11-NEXT: s_waitcnt vmcnt(0) 1142; GFX11-NEXT: s_setpc_b64 s[30:31] 1143 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1144 %val = load <16 x i16>, ptr addrspace(1) %ptr 1145 ret <16 x i16> %val 1146} 1147 1148; FIXME: Should pack 1149define <16 x i8> @v16i8_func_void() #0 { 1150; GFX789-LABEL: v16i8_func_void: 1151; GFX789: ; %bb.0: 1152; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1154; GFX789-NEXT: s_mov_b32 s7, 0xf000 1155; GFX789-NEXT: s_mov_b32 s6, -1 1156; GFX789-NEXT: s_waitcnt lgkmcnt(0) 1157; GFX789-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 1158; GFX789-NEXT: s_waitcnt vmcnt(0) 1159; GFX789-NEXT: v_lshrrev_b32_e32 v16, 8, v0 1160; GFX789-NEXT: v_lshrrev_b32_e32 v17, 16, v0 1161; GFX789-NEXT: v_lshrrev_b32_e32 v18, 24, v0 1162; GFX789-NEXT: v_lshrrev_b32_e32 v5, 8, v1 1163; GFX789-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1164; GFX789-NEXT: v_lshrrev_b32_e32 v7, 24, v1 1165; GFX789-NEXT: v_lshrrev_b32_e32 v9, 8, v2 1166; GFX789-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1167; GFX789-NEXT: v_lshrrev_b32_e32 v11, 24, v2 1168; GFX789-NEXT: v_lshrrev_b32_e32 v13, 8, v3 1169; GFX789-NEXT: v_lshrrev_b32_e32 v14, 16, v3 1170; GFX789-NEXT: v_lshrrev_b32_e32 v15, 24, v3 1171; GFX789-NEXT: v_mov_b32_e32 v4, v1 1172; GFX789-NEXT: v_mov_b32_e32 v8, v2 1173; GFX789-NEXT: v_mov_b32_e32 v12, v3 1174; GFX789-NEXT: v_mov_b32_e32 v1, v16 1175; GFX789-NEXT: v_mov_b32_e32 v2, v17 1176; GFX789-NEXT: v_mov_b32_e32 v3, v18 1177; GFX789-NEXT: s_setpc_b64 s[30:31] 1178; 1179; GFX11-LABEL: v16i8_func_void: 1180; GFX11: ; %bb.0: 1181; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1182; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1183; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1184; GFX11-NEXT: s_mov_b32 s2, -1 1185; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1186; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 1187; GFX11-NEXT: s_waitcnt vmcnt(0) 1188; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 1189; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 1190; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0 1191; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 1192; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 1193; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 1194; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 1195; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1196; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 1197; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3 1198; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3 1199; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3 1200; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16 1201; GFX11-NEXT: v_mov_b32_e32 v8, v2 1202; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18 1203; GFX11-NEXT: v_mov_b32_e32 v2, v17 1204; GFX11-NEXT: s_setpc_b64 s[30:31] 1205 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1206 %val = load <16 x i8>, ptr addrspace(1) %ptr 1207 ret <16 x i8> %val 1208} 1209 1210; FIXME: Should pack 1211define <4 x i8> @v4i8_func_void() #0 { 1212; GFX789-LABEL: v4i8_func_void: 1213; GFX789: ; %bb.0: 1214; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GFX789-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1216; GFX789-NEXT: s_mov_b32 s7, 0xf000 1217; GFX789-NEXT: s_mov_b32 s6, -1 1218; GFX789-NEXT: s_waitcnt lgkmcnt(0) 1219; GFX789-NEXT: buffer_load_dword v0, off, s[4:7], 0 1220; GFX789-NEXT: s_waitcnt vmcnt(0) 1221; GFX789-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1222; GFX789-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1223; GFX789-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1224; GFX789-NEXT: s_setpc_b64 s[30:31] 1225; 1226; GFX11-LABEL: v4i8_func_void: 1227; GFX11: ; %bb.0: 1228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1230; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1231; GFX11-NEXT: s_mov_b32 s2, -1 1232; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1233; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 1234; GFX11-NEXT: s_waitcnt vmcnt(0) 1235; GFX11-NEXT: v_lshrrev_b32_e32 v1, 8, v0 1236; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1237; GFX11-NEXT: v_lshrrev_b32_e32 v3, 24, v0 1238; GFX11-NEXT: s_setpc_b64 s[30:31] 1239 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1240 %val = load <4 x i8>, ptr addrspace(1) %ptr 1241 ret <4 x i8> %val 1242} 1243 1244define {i8, i32} @struct_i8_i32_func_void() #0 { 1245; GFX789-LABEL: struct_i8_i32_func_void: 1246; GFX789: ; %bb.0: 1247; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; GFX789-NEXT: s_mov_b32 s7, 0xf000 1249; GFX789-NEXT: s_mov_b32 s6, -1 1250; GFX789-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 1251; GFX789-NEXT: buffer_load_dword v1, off, s[4:7], 0 1252; GFX789-NEXT: s_waitcnt vmcnt(0) 1253; GFX789-NEXT: s_setpc_b64 s[30:31] 1254; 1255; GFX11-LABEL: struct_i8_i32_func_void: 1256; GFX11: ; %bb.0: 1257; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1259; GFX11-NEXT: s_mov_b32 s2, -1 1260; GFX11-NEXT: s_clause 0x1 1261; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 1262; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 1263; GFX11-NEXT: s_waitcnt vmcnt(0) 1264; GFX11-NEXT: s_setpc_b64 s[30:31] 1265 %val = load { i8, i32 }, ptr addrspace(1) undef 1266 ret { i8, i32 } %val 1267} 1268 1269define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 { 1270; GFX789-LABEL: void_func_sret_struct_i8_i32: 1271; GFX789: ; %bb.0: 1272; GFX789-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1273; GFX789-NEXT: s_mov_b32 s7, 0xf000 1274; GFX789-NEXT: s_mov_b32 s6, -1 1275; GFX789-NEXT: buffer_load_ubyte v1, off, s[4:7], 0 glc 1276; GFX789-NEXT: s_waitcnt vmcnt(0) 1277; GFX789-NEXT: buffer_load_dword v2, off, s[4:7], 0 glc 1278; GFX789-NEXT: s_waitcnt vmcnt(0) 1279; GFX789-NEXT: buffer_store_byte v1, v0, s[0:3], 0 offen 1280; GFX789-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4 1281; GFX789-NEXT: s_waitcnt vmcnt(0) 1282; GFX789-NEXT: s_setpc_b64 s[30:31] 1283; 1284; GFX11-LABEL: void_func_sret_struct_i8_i32: 1285; GFX11: ; %bb.0: 1286; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1287; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1288; GFX11-NEXT: s_mov_b32 s2, -1 1289; GFX11-NEXT: buffer_load_u8 v1, off, s[0:3], 0 glc dlc 1290; GFX11-NEXT: s_waitcnt vmcnt(0) 1291; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 glc dlc 1292; GFX11-NEXT: s_waitcnt vmcnt(0) 1293; GFX11-NEXT: s_clause 0x1 1294; GFX11-NEXT: scratch_store_b8 v0, v1, off 1295; GFX11-NEXT: scratch_store_b32 v0, v2, off offset:4 1296; GFX11-NEXT: s_setpc_b64 s[30:31] 1297 %val0 = load volatile i8, ptr addrspace(1) undef 1298 %val1 = load volatile i32, ptr addrspace(1) undef 1299 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0 1300 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1 1301 store i8 %val0, ptr addrspace(5) %gep0 1302 store i32 %val1, ptr addrspace(5) %gep1 1303 ret void 1304} 1305 1306; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 1307; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 1308; AssertZext inserted. Not using it introduces the spills. 1309define <33 x i32> @v33i32_func_void() #0 { 1310; CI-LABEL: v33i32_func_void: 1311; CI: ; %bb.0: 1312; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1313; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1314; CI-NEXT: s_mov_b32 s7, 0xf000 1315; CI-NEXT: s_mov_b32 s6, -1 1316; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0 1317; CI-NEXT: s_waitcnt lgkmcnt(0) 1318; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1319; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1320; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1321; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1322; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1323; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1324; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1325; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1326; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1327; CI-NEXT: s_waitcnt vmcnt(8) 1328; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen 1329; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0 1330; CI-NEXT: s_waitcnt vmcnt(8) 1331; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1332; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0 1333; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1334; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0 1335; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1336; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0 1337; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1338; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0 1339; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0 1340; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0 1341; CI-NEXT: s_waitcnt vmcnt(11) 1342; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1343; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0 1344; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 1345; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0 1346; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 1347; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0 1348; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 1349; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0 1350; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0 1351; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0 1352; CI-NEXT: s_waitcnt vmcnt(14) 1353; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 1354; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 1355; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 1356; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0 1357; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0 1358; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0 1359; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0 1360; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0 1361; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 1362; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0 1363; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0 1364; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0 1365; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0 1366; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0 1367; CI-NEXT: s_waitcnt vmcnt(14) 1368; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 1369; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 1370; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 1371; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 1372; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 1373; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 1374; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 1375; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 1376; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 1377; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 1378; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 1379; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0 1380; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen 1381; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0 1382; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 1383; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0 1384; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 1385; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0 1386; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 1387; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0 1388; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 1389; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0 1390; CI-NEXT: s_waitcnt vmcnt(14) 1391; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 1392; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0 1393; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 1394; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0 1395; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 1396; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1397; CI-NEXT: s_waitcnt vmcnt(0) 1398; CI-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX8-LABEL: v33i32_func_void: 1401; GFX8: ; %bb.0: 1402; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1404; GFX8-NEXT: s_mov_b32 s7, 0xf000 1405; GFX8-NEXT: s_mov_b32 s6, -1 1406; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0 1407; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1408; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1409; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1410; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1411; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1412; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1413; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1414; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1415; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1416; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1417; GFX8-NEXT: s_waitcnt vmcnt(8) 1418; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen 1419; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0 1420; GFX8-NEXT: s_waitcnt vmcnt(8) 1421; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1422; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0 1423; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1424; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0 1425; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1426; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0 1427; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1428; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0 1429; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0 1430; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0 1431; GFX8-NEXT: s_waitcnt vmcnt(11) 1432; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1433; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0 1434; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 1435; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0 1436; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 1437; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0 1438; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 1439; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0 1440; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0 1441; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0 1442; GFX8-NEXT: s_waitcnt vmcnt(14) 1443; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 1444; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 1445; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 1446; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0 1447; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0 1448; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0 1449; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0 1450; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0 1451; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 1452; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0 1453; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0 1454; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0 1455; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0 1456; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0 1457; GFX8-NEXT: s_waitcnt vmcnt(14) 1458; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 1459; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 1460; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 1461; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 1462; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 1463; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 1464; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 1465; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 1466; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 1467; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 1468; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 1469; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0 1470; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen 1471; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0 1472; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 1473; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0 1474; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 1475; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0 1476; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 1477; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0 1478; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 1479; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0 1480; GFX8-NEXT: s_waitcnt vmcnt(14) 1481; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 1482; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0 1483; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 1484; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0 1485; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 1486; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1487; GFX8-NEXT: s_waitcnt vmcnt(0) 1488; GFX8-NEXT: s_setpc_b64 s[30:31] 1489; 1490; GFX9-LABEL: v33i32_func_void: 1491; GFX9: ; %bb.0: 1492; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1493; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1494; GFX9-NEXT: s_mov_b32 s7, 0xf000 1495; GFX9-NEXT: s_mov_b32 s6, -1 1496; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1497; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1498; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1499; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1500; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1501; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1502; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1503; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1504; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1505; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1506; GFX9-NEXT: s_waitcnt vmcnt(8) 1507; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124 1508; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120 1509; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116 1510; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112 1511; GFX9-NEXT: s_waitcnt vmcnt(11) 1512; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108 1513; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104 1514; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100 1515; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96 1516; GFX9-NEXT: s_waitcnt vmcnt(14) 1517; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92 1518; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88 1519; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84 1520; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80 1521; GFX9-NEXT: s_waitcnt vmcnt(17) 1522; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76 1523; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72 1524; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68 1525; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64 1526; GFX9-NEXT: s_waitcnt vmcnt(20) 1527; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 1528; GFX9-NEXT: s_waitcnt vmcnt(20) 1529; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60 1530; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56 1531; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52 1532; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48 1533; GFX9-NEXT: s_waitcnt vmcnt(23) 1534; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44 1535; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40 1536; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36 1537; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32 1538; GFX9-NEXT: s_waitcnt vmcnt(26) 1539; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28 1540; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24 1541; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20 1542; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16 1543; GFX9-NEXT: s_waitcnt vmcnt(29) 1544; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12 1545; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8 1546; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4 1547; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1548; GFX9-NEXT: s_waitcnt vmcnt(0) 1549; GFX9-NEXT: s_setpc_b64 s[30:31] 1550; 1551; GFX11-LABEL: v33i32_func_void: 1552; GFX11: ; %bb.0: 1553; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1554; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1555; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1556; GFX11-NEXT: s_mov_b32 s2, -1 1557; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1558; GFX11-NEXT: s_clause 0x8 1559; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112 1560; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96 1561; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 1562; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 1563; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 1564; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 1565; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 1566; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 1567; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 1568; GFX11-NEXT: s_waitcnt vmcnt(8) 1569; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 1570; GFX11-NEXT: s_waitcnt vmcnt(7) 1571; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 1572; GFX11-NEXT: s_waitcnt vmcnt(6) 1573; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 1574; GFX11-NEXT: s_waitcnt vmcnt(5) 1575; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 1576; GFX11-NEXT: s_waitcnt vmcnt(4) 1577; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 1578; GFX11-NEXT: s_waitcnt vmcnt(3) 1579; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 1580; GFX11-NEXT: s_waitcnt vmcnt(2) 1581; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 1582; GFX11-NEXT: s_waitcnt vmcnt(1) 1583; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off 1584; GFX11-NEXT: s_waitcnt vmcnt(0) 1585; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 1586; GFX11-NEXT: s_setpc_b64 s[30:31] 1587 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1588 %val = load <33 x i32>, ptr addrspace(1) %ptr 1589 ret <33 x i32> %val 1590} 1591 1592define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 1593; CI-LABEL: struct_v32i32_i32_func_void: 1594; CI: ; %bb.0: 1595; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1596; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1597; CI-NEXT: s_mov_b32 s7, 0xf000 1598; CI-NEXT: s_mov_b32 s6, -1 1599; CI-NEXT: v_add_i32_e32 v34, vcc, 0x80, v0 1600; CI-NEXT: s_waitcnt lgkmcnt(0) 1601; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1602; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1603; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1604; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1605; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1606; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1607; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1608; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1609; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1610; CI-NEXT: s_waitcnt vmcnt(8) 1611; CI-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen 1612; CI-NEXT: v_add_i32_e32 v33, vcc, 0x7c, v0 1613; CI-NEXT: s_waitcnt vmcnt(8) 1614; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1615; CI-NEXT: v_add_i32_e32 v4, vcc, 0x78, v0 1616; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1617; CI-NEXT: v_add_i32_e32 v3, vcc, 0x74, v0 1618; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1619; CI-NEXT: v_add_i32_e32 v2, vcc, 0x70, v0 1620; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1621; CI-NEXT: v_add_i32_e32 v1, vcc, 0x6c, v0 1622; CI-NEXT: v_add_i32_e32 v2, vcc, 0x68, v0 1623; CI-NEXT: v_add_i32_e32 v3, vcc, 0x64, v0 1624; CI-NEXT: s_waitcnt vmcnt(11) 1625; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1626; CI-NEXT: v_add_i32_e32 v1, vcc, 0x60, v0 1627; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 1628; CI-NEXT: v_add_i32_e32 v2, vcc, 0x5c, v0 1629; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 1630; CI-NEXT: v_add_i32_e32 v3, vcc, 0x58, v0 1631; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 1632; CI-NEXT: v_add_i32_e32 v1, vcc, 0x54, v0 1633; CI-NEXT: v_add_i32_e32 v4, vcc, 0x50, v0 1634; CI-NEXT: v_add_i32_e32 v5, vcc, 0x4c, v0 1635; CI-NEXT: s_waitcnt vmcnt(14) 1636; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 1637; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 1638; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 1639; CI-NEXT: v_add_i32_e32 v1, vcc, 56, v0 1640; CI-NEXT: v_add_i32_e32 v6, vcc, 0x48, v0 1641; CI-NEXT: v_add_i32_e32 v7, vcc, 0x44, v0 1642; CI-NEXT: v_add_i32_e32 v2, vcc, 64, v0 1643; CI-NEXT: v_add_i32_e32 v3, vcc, 60, v0 1644; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 1645; CI-NEXT: v_add_i32_e32 v4, vcc, 52, v0 1646; CI-NEXT: v_add_i32_e32 v8, vcc, 48, v0 1647; CI-NEXT: v_add_i32_e32 v9, vcc, 44, v0 1648; CI-NEXT: v_add_i32_e32 v10, vcc, 40, v0 1649; CI-NEXT: v_add_i32_e32 v11, vcc, 36, v0 1650; CI-NEXT: s_waitcnt vmcnt(14) 1651; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 1652; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 1653; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 1654; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 1655; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 1656; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 1657; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 1658; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 1659; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 1660; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 1661; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 1662; CI-NEXT: v_add_i32_e32 v1, vcc, 32, v0 1663; CI-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen 1664; CI-NEXT: v_add_i32_e32 v1, vcc, 28, v0 1665; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 1666; CI-NEXT: v_add_i32_e32 v1, vcc, 24, v0 1667; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 1668; CI-NEXT: v_add_i32_e32 v1, vcc, 20, v0 1669; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 1670; CI-NEXT: v_add_i32_e32 v1, vcc, 16, v0 1671; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 1672; CI-NEXT: v_add_i32_e32 v1, vcc, 12, v0 1673; CI-NEXT: s_waitcnt vmcnt(14) 1674; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 1675; CI-NEXT: v_add_i32_e32 v1, vcc, 8, v0 1676; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 1677; CI-NEXT: v_add_i32_e32 v1, vcc, 4, v0 1678; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 1679; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1680; CI-NEXT: s_waitcnt vmcnt(0) 1681; CI-NEXT: s_setpc_b64 s[30:31] 1682; 1683; GFX8-LABEL: struct_v32i32_i32_func_void: 1684; GFX8: ; %bb.0: 1685; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1686; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1687; GFX8-NEXT: s_mov_b32 s7, 0xf000 1688; GFX8-NEXT: s_mov_b32 s6, -1 1689; GFX8-NEXT: v_add_u32_e32 v34, vcc, 0x80, v0 1690; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1691; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1692; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1693; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1694; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1695; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1696; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1697; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1698; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1699; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1700; GFX8-NEXT: s_waitcnt vmcnt(8) 1701; GFX8-NEXT: buffer_store_dword v33, v34, s[0:3], 0 offen 1702; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0x7c, v0 1703; GFX8-NEXT: s_waitcnt vmcnt(8) 1704; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1705; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x78, v0 1706; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1707; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x74, v0 1708; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1709; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x70, v0 1710; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1711; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x6c, v0 1712; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x68, v0 1713; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x64, v0 1714; GFX8-NEXT: s_waitcnt vmcnt(11) 1715; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1716; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x60, v0 1717; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 1718; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x5c, v0 1719; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 1720; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x58, v0 1721; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 1722; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x54, v0 1723; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x50, v0 1724; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x4c, v0 1725; GFX8-NEXT: s_waitcnt vmcnt(14) 1726; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 1727; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 1728; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 1729; GFX8-NEXT: v_add_u32_e32 v1, vcc, 56, v0 1730; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x48, v0 1731; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0x44, v0 1732; GFX8-NEXT: v_add_u32_e32 v2, vcc, 64, v0 1733; GFX8-NEXT: v_add_u32_e32 v3, vcc, 60, v0 1734; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 1735; GFX8-NEXT: v_add_u32_e32 v4, vcc, 52, v0 1736; GFX8-NEXT: v_add_u32_e32 v8, vcc, 48, v0 1737; GFX8-NEXT: v_add_u32_e32 v9, vcc, 44, v0 1738; GFX8-NEXT: v_add_u32_e32 v10, vcc, 40, v0 1739; GFX8-NEXT: v_add_u32_e32 v11, vcc, 36, v0 1740; GFX8-NEXT: s_waitcnt vmcnt(14) 1741; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 1742; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 1743; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 1744; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 1745; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 1746; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 1747; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 1748; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 1749; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 1750; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 1751; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 1752; GFX8-NEXT: v_add_u32_e32 v1, vcc, 32, v0 1753; GFX8-NEXT: buffer_store_dword v21, v1, s[0:3], 0 offen 1754; GFX8-NEXT: v_add_u32_e32 v1, vcc, 28, v0 1755; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 1756; GFX8-NEXT: v_add_u32_e32 v1, vcc, 24, v0 1757; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 1758; GFX8-NEXT: v_add_u32_e32 v1, vcc, 20, v0 1759; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 1760; GFX8-NEXT: v_add_u32_e32 v1, vcc, 16, v0 1761; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 1762; GFX8-NEXT: v_add_u32_e32 v1, vcc, 12, v0 1763; GFX8-NEXT: s_waitcnt vmcnt(14) 1764; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 1765; GFX8-NEXT: v_add_u32_e32 v1, vcc, 8, v0 1766; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 1767; GFX8-NEXT: v_add_u32_e32 v1, vcc, 4, v0 1768; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 1769; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1770; GFX8-NEXT: s_waitcnt vmcnt(0) 1771; GFX8-NEXT: s_setpc_b64 s[30:31] 1772; 1773; GFX9-LABEL: struct_v32i32_i32_func_void: 1774; GFX9: ; %bb.0: 1775; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1776; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1777; GFX9-NEXT: s_mov_b32 s7, 0xf000 1778; GFX9-NEXT: s_mov_b32 s6, -1 1779; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1780; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112 1781; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96 1782; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80 1783; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64 1784; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 offset:128 1785; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48 1786; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32 1787; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16 1788; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 1789; GFX9-NEXT: s_waitcnt vmcnt(8) 1790; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:124 1791; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:120 1792; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:116 1793; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:112 1794; GFX9-NEXT: s_waitcnt vmcnt(11) 1795; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:108 1796; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:104 1797; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:100 1798; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:96 1799; GFX9-NEXT: s_waitcnt vmcnt(14) 1800; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:92 1801; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:88 1802; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:84 1803; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:80 1804; GFX9-NEXT: s_waitcnt vmcnt(17) 1805; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:76 1806; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:72 1807; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:68 1808; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:64 1809; GFX9-NEXT: s_waitcnt vmcnt(20) 1810; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen offset:128 1811; GFX9-NEXT: s_waitcnt vmcnt(20) 1812; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:60 1813; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:56 1814; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:52 1815; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:48 1816; GFX9-NEXT: s_waitcnt vmcnt(23) 1817; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:44 1818; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:40 1819; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:36 1820; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:32 1821; GFX9-NEXT: s_waitcnt vmcnt(26) 1822; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:28 1823; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:24 1824; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:20 1825; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:16 1826; GFX9-NEXT: s_waitcnt vmcnt(29) 1827; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:12 1828; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:8 1829; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:4 1830; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1831; GFX9-NEXT: s_waitcnt vmcnt(0) 1832; GFX9-NEXT: s_setpc_b64 s[30:31] 1833; 1834; GFX11-LABEL: struct_v32i32_i32_func_void: 1835; GFX11: ; %bb.0: 1836; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1837; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 1838; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1839; GFX11-NEXT: s_mov_b32 s2, -1 1840; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1841; GFX11-NEXT: s_clause 0x8 1842; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112 1843; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96 1844; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 1845; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 1846; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 1847; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 1848; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 1849; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 1850; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 1851; GFX11-NEXT: s_waitcnt vmcnt(8) 1852; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 1853; GFX11-NEXT: s_waitcnt vmcnt(7) 1854; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 1855; GFX11-NEXT: s_waitcnt vmcnt(6) 1856; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 1857; GFX11-NEXT: s_waitcnt vmcnt(5) 1858; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 1859; GFX11-NEXT: s_waitcnt vmcnt(4) 1860; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 1861; GFX11-NEXT: s_waitcnt vmcnt(3) 1862; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 1863; GFX11-NEXT: s_waitcnt vmcnt(2) 1864; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 1865; GFX11-NEXT: s_waitcnt vmcnt(1) 1866; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off 1867; GFX11-NEXT: s_waitcnt vmcnt(0) 1868; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 1869; GFX11-NEXT: s_setpc_b64 s[30:31] 1870 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 1871 %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr 1872 ret { <32 x i32>, i32 }%val 1873} 1874 1875define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 1876; CI-LABEL: struct_i32_v32i32_func_void: 1877; CI: ; %bb.0: 1878; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; CI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1880; CI-NEXT: s_mov_b32 s7, 0xf000 1881; CI-NEXT: s_mov_b32 s6, -1 1882; CI-NEXT: s_waitcnt lgkmcnt(0) 1883; CI-NEXT: buffer_load_dword v33, off, s[4:7], 0 1884; CI-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 1885; CI-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 1886; CI-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 1887; CI-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 1888; CI-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 1889; CI-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 1890; CI-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 1891; CI-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 1892; CI-NEXT: s_waitcnt vmcnt(8) 1893; CI-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen 1894; CI-NEXT: v_add_i32_e32 v33, vcc, 0xfc, v0 1895; CI-NEXT: s_waitcnt vmcnt(8) 1896; CI-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1897; CI-NEXT: v_add_i32_e32 v4, vcc, 0xf8, v0 1898; CI-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1899; CI-NEXT: v_add_i32_e32 v3, vcc, 0xf4, v0 1900; CI-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1901; CI-NEXT: v_add_i32_e32 v2, vcc, 0xf0, v0 1902; CI-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1903; CI-NEXT: v_add_i32_e32 v1, vcc, 0xec, v0 1904; CI-NEXT: v_add_i32_e32 v2, vcc, 0xe8, v0 1905; CI-NEXT: v_add_i32_e32 v3, vcc, 0xe4, v0 1906; CI-NEXT: s_waitcnt vmcnt(11) 1907; CI-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1908; CI-NEXT: v_add_i32_e32 v1, vcc, 0xe0, v0 1909; CI-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 1910; CI-NEXT: v_add_i32_e32 v2, vcc, 0xdc, v0 1911; CI-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 1912; CI-NEXT: v_add_i32_e32 v3, vcc, 0xd8, v0 1913; CI-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 1914; CI-NEXT: v_add_i32_e32 v1, vcc, 0xd4, v0 1915; CI-NEXT: v_add_i32_e32 v4, vcc, 0xd0, v0 1916; CI-NEXT: v_add_i32_e32 v5, vcc, 0xcc, v0 1917; CI-NEXT: v_add_i32_e32 v6, vcc, 0xc8, v0 1918; CI-NEXT: s_waitcnt vmcnt(14) 1919; CI-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 1920; CI-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 1921; CI-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 1922; CI-NEXT: v_add_i32_e32 v1, vcc, 0xb8, v0 1923; CI-NEXT: v_add_i32_e32 v7, vcc, 0xc4, v0 1924; CI-NEXT: v_add_i32_e32 v2, vcc, 0xc0, v0 1925; CI-NEXT: v_add_i32_e32 v3, vcc, 0xbc, v0 1926; CI-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 1927; CI-NEXT: v_add_i32_e32 v4, vcc, 0xb4, v0 1928; CI-NEXT: v_add_i32_e32 v8, vcc, 0xb0, v0 1929; CI-NEXT: v_add_i32_e32 v9, vcc, 0xac, v0 1930; CI-NEXT: v_add_i32_e32 v10, vcc, 0xa8, v0 1931; CI-NEXT: v_add_i32_e32 v11, vcc, 0xa4, v0 1932; CI-NEXT: s_waitcnt vmcnt(14) 1933; CI-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 1934; CI-NEXT: v_add_i32_e32 v5, vcc, 0xa0, v0 1935; CI-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 1936; CI-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 1937; CI-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 1938; CI-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 1939; CI-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 1940; CI-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 1941; CI-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 1942; CI-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 1943; CI-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 1944; CI-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 1945; CI-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen 1946; CI-NEXT: v_add_i32_e32 v1, vcc, 0x9c, v0 1947; CI-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 1948; CI-NEXT: v_add_i32_e32 v1, vcc, 0x98, v0 1949; CI-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 1950; CI-NEXT: v_add_i32_e32 v1, vcc, 0x94, v0 1951; CI-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 1952; CI-NEXT: v_add_i32_e32 v1, vcc, 0x90, v0 1953; CI-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 1954; CI-NEXT: v_add_i32_e32 v1, vcc, 0x8c, v0 1955; CI-NEXT: s_waitcnt vmcnt(14) 1956; CI-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 1957; CI-NEXT: v_add_i32_e32 v1, vcc, 0x88, v0 1958; CI-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 1959; CI-NEXT: v_add_i32_e32 v1, vcc, 0x84, v0 1960; CI-NEXT: v_add_i32_e32 v0, vcc, 0x80, v0 1961; CI-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 1962; CI-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 1963; CI-NEXT: s_waitcnt vmcnt(0) 1964; CI-NEXT: s_setpc_b64 s[30:31] 1965; 1966; GFX8-LABEL: struct_i32_v32i32_func_void: 1967; GFX8: ; %bb.0: 1968; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1969; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 1970; GFX8-NEXT: s_mov_b32 s7, 0xf000 1971; GFX8-NEXT: s_mov_b32 s6, -1 1972; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1973; GFX8-NEXT: buffer_load_dword v33, off, s[4:7], 0 1974; GFX8-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 1975; GFX8-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 1976; GFX8-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 1977; GFX8-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 1978; GFX8-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 1979; GFX8-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 1980; GFX8-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 1981; GFX8-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 1982; GFX8-NEXT: s_waitcnt vmcnt(8) 1983; GFX8-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen 1984; GFX8-NEXT: v_add_u32_e32 v33, vcc, 0xfc, v0 1985; GFX8-NEXT: s_waitcnt vmcnt(8) 1986; GFX8-NEXT: buffer_store_dword v4, v33, s[0:3], 0 offen 1987; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xf8, v0 1988; GFX8-NEXT: buffer_store_dword v3, v4, s[0:3], 0 offen 1989; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xf4, v0 1990; GFX8-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen 1991; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xf0, v0 1992; GFX8-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen 1993; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xec, v0 1994; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xe8, v0 1995; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xe4, v0 1996; GFX8-NEXT: s_waitcnt vmcnt(11) 1997; GFX8-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen 1998; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xe0, v0 1999; GFX8-NEXT: buffer_store_dword v7, v2, s[0:3], 0 offen 2000; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xdc, v0 2001; GFX8-NEXT: buffer_store_dword v6, v3, s[0:3], 0 offen 2002; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xd8, v0 2003; GFX8-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen 2004; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xd4, v0 2005; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xd0, v0 2006; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xcc, v0 2007; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0xc8, v0 2008; GFX8-NEXT: s_waitcnt vmcnt(14) 2009; GFX8-NEXT: buffer_store_dword v12, v2, s[0:3], 0 offen 2010; GFX8-NEXT: buffer_store_dword v11, v3, s[0:3], 0 offen 2011; GFX8-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen 2012; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0xb8, v0 2013; GFX8-NEXT: v_add_u32_e32 v7, vcc, 0xc4, v0 2014; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xc0, v0 2015; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xbc, v0 2016; GFX8-NEXT: buffer_store_dword v9, v4, s[0:3], 0 offen 2017; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xb4, v0 2018; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xb0, v0 2019; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xac, v0 2020; GFX8-NEXT: v_add_u32_e32 v10, vcc, 0xa8, v0 2021; GFX8-NEXT: v_add_u32_e32 v11, vcc, 0xa4, v0 2022; GFX8-NEXT: s_waitcnt vmcnt(14) 2023; GFX8-NEXT: buffer_store_dword v16, v5, s[0:3], 0 offen 2024; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0xa0, v0 2025; GFX8-NEXT: buffer_store_dword v15, v6, s[0:3], 0 offen 2026; GFX8-NEXT: buffer_store_dword v14, v7, s[0:3], 0 offen 2027; GFX8-NEXT: buffer_store_dword v13, v2, s[0:3], 0 offen 2028; GFX8-NEXT: buffer_store_dword v20, v3, s[0:3], 0 offen 2029; GFX8-NEXT: buffer_store_dword v19, v1, s[0:3], 0 offen 2030; GFX8-NEXT: buffer_store_dword v18, v4, s[0:3], 0 offen 2031; GFX8-NEXT: buffer_store_dword v17, v8, s[0:3], 0 offen 2032; GFX8-NEXT: buffer_store_dword v24, v9, s[0:3], 0 offen 2033; GFX8-NEXT: buffer_store_dword v23, v10, s[0:3], 0 offen 2034; GFX8-NEXT: buffer_store_dword v22, v11, s[0:3], 0 offen 2035; GFX8-NEXT: buffer_store_dword v21, v5, s[0:3], 0 offen 2036; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x9c, v0 2037; GFX8-NEXT: buffer_store_dword v28, v1, s[0:3], 0 offen 2038; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x98, v0 2039; GFX8-NEXT: buffer_store_dword v27, v1, s[0:3], 0 offen 2040; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x94, v0 2041; GFX8-NEXT: buffer_store_dword v26, v1, s[0:3], 0 offen 2042; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x90, v0 2043; GFX8-NEXT: buffer_store_dword v25, v1, s[0:3], 0 offen 2044; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x8c, v0 2045; GFX8-NEXT: s_waitcnt vmcnt(14) 2046; GFX8-NEXT: buffer_store_dword v32, v1, s[0:3], 0 offen 2047; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x88, v0 2048; GFX8-NEXT: buffer_store_dword v31, v1, s[0:3], 0 offen 2049; GFX8-NEXT: v_add_u32_e32 v1, vcc, 0x84, v0 2050; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x80, v0 2051; GFX8-NEXT: buffer_store_dword v30, v1, s[0:3], 0 offen 2052; GFX8-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen 2053; GFX8-NEXT: s_waitcnt vmcnt(0) 2054; GFX8-NEXT: s_setpc_b64 s[30:31] 2055; 2056; GFX9-LABEL: struct_i32_v32i32_func_void: 2057; GFX9: ; %bb.0: 2058; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2059; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 2060; GFX9-NEXT: s_mov_b32 s7, 0xf000 2061; GFX9-NEXT: s_mov_b32 s6, -1 2062; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2063; GFX9-NEXT: buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240 2064; GFX9-NEXT: buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224 2065; GFX9-NEXT: buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208 2066; GFX9-NEXT: buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192 2067; GFX9-NEXT: buffer_load_dword v33, off, s[4:7], 0 2068; GFX9-NEXT: buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176 2069; GFX9-NEXT: buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160 2070; GFX9-NEXT: buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144 2071; GFX9-NEXT: buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128 2072; GFX9-NEXT: s_waitcnt vmcnt(8) 2073; GFX9-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen offset:252 2074; GFX9-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:248 2075; GFX9-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:244 2076; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:240 2077; GFX9-NEXT: s_waitcnt vmcnt(11) 2078; GFX9-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen offset:236 2079; GFX9-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen offset:232 2080; GFX9-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen offset:228 2081; GFX9-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen offset:224 2082; GFX9-NEXT: s_waitcnt vmcnt(14) 2083; GFX9-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen offset:220 2084; GFX9-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen offset:216 2085; GFX9-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen offset:212 2086; GFX9-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen offset:208 2087; GFX9-NEXT: s_waitcnt vmcnt(17) 2088; GFX9-NEXT: buffer_store_dword v16, v0, s[0:3], 0 offen offset:204 2089; GFX9-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen offset:200 2090; GFX9-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen offset:196 2091; GFX9-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen offset:192 2092; GFX9-NEXT: s_waitcnt vmcnt(20) 2093; GFX9-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen 2094; GFX9-NEXT: s_waitcnt vmcnt(20) 2095; GFX9-NEXT: buffer_store_dword v20, v0, s[0:3], 0 offen offset:188 2096; GFX9-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen offset:184 2097; GFX9-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen offset:180 2098; GFX9-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen offset:176 2099; GFX9-NEXT: s_waitcnt vmcnt(23) 2100; GFX9-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen offset:172 2101; GFX9-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen offset:168 2102; GFX9-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen offset:164 2103; GFX9-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen offset:160 2104; GFX9-NEXT: s_waitcnt vmcnt(26) 2105; GFX9-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen offset:156 2106; GFX9-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen offset:152 2107; GFX9-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen offset:148 2108; GFX9-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen offset:144 2109; GFX9-NEXT: s_waitcnt vmcnt(29) 2110; GFX9-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen offset:140 2111; GFX9-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen offset:136 2112; GFX9-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen offset:132 2113; GFX9-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen offset:128 2114; GFX9-NEXT: s_waitcnt vmcnt(0) 2115; GFX9-NEXT: s_setpc_b64 s[30:31] 2116; 2117; GFX11-LABEL: struct_i32_v32i32_func_void: 2118; GFX11: ; %bb.0: 2119; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2120; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 2121; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2122; GFX11-NEXT: s_mov_b32 s2, -1 2123; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2124; GFX11-NEXT: s_clause 0x8 2125; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 offset:240 2126; GFX11-NEXT: buffer_load_b128 v[5:8], off, s[0:3], 0 offset:224 2127; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:208 2128; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:192 2129; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:176 2130; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:160 2131; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144 2132; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128 2133; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 2134; GFX11-NEXT: s_waitcnt vmcnt(8) 2135; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 2136; GFX11-NEXT: s_waitcnt vmcnt(7) 2137; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224 2138; GFX11-NEXT: s_waitcnt vmcnt(6) 2139; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208 2140; GFX11-NEXT: s_waitcnt vmcnt(5) 2141; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192 2142; GFX11-NEXT: s_waitcnt vmcnt(4) 2143; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176 2144; GFX11-NEXT: s_waitcnt vmcnt(3) 2145; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160 2146; GFX11-NEXT: s_waitcnt vmcnt(2) 2147; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144 2148; GFX11-NEXT: s_waitcnt vmcnt(1) 2149; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128 2150; GFX11-NEXT: s_waitcnt vmcnt(0) 2151; GFX11-NEXT: scratch_store_b32 v0, v33, off 2152; GFX11-NEXT: s_setpc_b64 s[30:31] 2153 %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef 2154 %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr 2155 ret { i32, <32 x i32> }%val 2156} 2157 2158; Make sure the last struct component is returned in v3, not v4. 2159define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 2160; CI-LABEL: v3i32_struct_func_void_wasted_reg: 2161; CI: ; %bb.0: 2162; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2163; CI-NEXT: s_mov_b32 m0, -1 2164; CI-NEXT: ds_read_b32 v0, v0 2165; CI-NEXT: s_waitcnt lgkmcnt(0) 2166; CI-NEXT: ds_read_b32 v1, v0 2167; CI-NEXT: ds_read_b32 v2, v0 2168; CI-NEXT: ds_read_b32 v3, v0 2169; CI-NEXT: s_waitcnt lgkmcnt(0) 2170; CI-NEXT: s_setpc_b64 s[30:31] 2171; 2172; GFX8-LABEL: v3i32_struct_func_void_wasted_reg: 2173; GFX8: ; %bb.0: 2174; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2175; GFX8-NEXT: s_mov_b32 m0, -1 2176; GFX8-NEXT: ds_read_b32 v0, v0 2177; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2178; GFX8-NEXT: ds_read_b32 v1, v0 2179; GFX8-NEXT: ds_read_b32 v2, v0 2180; GFX8-NEXT: ds_read_b32 v3, v0 2181; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2182; GFX8-NEXT: s_setpc_b64 s[30:31] 2183; 2184; GFX9-LABEL: v3i32_struct_func_void_wasted_reg: 2185; GFX9: ; %bb.0: 2186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2187; GFX9-NEXT: ds_read_b32 v0, v0 2188; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2189; GFX9-NEXT: ds_read_b32 v1, v0 2190; GFX9-NEXT: ds_read_b32 v2, v0 2191; GFX9-NEXT: ds_read_b32 v3, v0 2192; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2193; GFX9-NEXT: s_setpc_b64 s[30:31] 2194; 2195; GFX11-LABEL: v3i32_struct_func_void_wasted_reg: 2196; GFX11: ; %bb.0: 2197; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2198; GFX11-NEXT: ds_load_b32 v0, v0 2199; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2200; GFX11-NEXT: ds_load_b32 v1, v0 2201; GFX11-NEXT: ds_load_b32 v2, v0 2202; GFX11-NEXT: ds_load_b32 v3, v0 2203; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2204; GFX11-NEXT: s_setpc_b64 s[30:31] 2205 %load0 = load volatile i32, ptr addrspace(3) undef 2206 %load1 = load volatile i32, ptr addrspace(3) undef 2207 %load2 = load volatile i32, ptr addrspace(3) undef 2208 %load3 = load volatile i32, ptr addrspace(3) undef 2209 2210 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 2211 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 2212 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 2213 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 2214 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 2215 ret { <3 x i32>, i32 } %insert.4 2216} 2217 2218define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 2219; CI-LABEL: v3f32_struct_func_void_wasted_reg: 2220; CI: ; %bb.0: 2221; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2222; CI-NEXT: s_mov_b32 m0, -1 2223; CI-NEXT: ds_read_b32 v0, v0 2224; CI-NEXT: s_waitcnt lgkmcnt(0) 2225; CI-NEXT: ds_read_b32 v1, v0 2226; CI-NEXT: ds_read_b32 v2, v0 2227; CI-NEXT: ds_read_b32 v3, v0 2228; CI-NEXT: s_waitcnt lgkmcnt(0) 2229; CI-NEXT: s_setpc_b64 s[30:31] 2230; 2231; GFX8-LABEL: v3f32_struct_func_void_wasted_reg: 2232; GFX8: ; %bb.0: 2233; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2234; GFX8-NEXT: s_mov_b32 m0, -1 2235; GFX8-NEXT: ds_read_b32 v0, v0 2236; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2237; GFX8-NEXT: ds_read_b32 v1, v0 2238; GFX8-NEXT: ds_read_b32 v2, v0 2239; GFX8-NEXT: ds_read_b32 v3, v0 2240; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2241; GFX8-NEXT: s_setpc_b64 s[30:31] 2242; 2243; GFX9-LABEL: v3f32_struct_func_void_wasted_reg: 2244; GFX9: ; %bb.0: 2245; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2246; GFX9-NEXT: ds_read_b32 v0, v0 2247; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2248; GFX9-NEXT: ds_read_b32 v1, v0 2249; GFX9-NEXT: ds_read_b32 v2, v0 2250; GFX9-NEXT: ds_read_b32 v3, v0 2251; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2252; GFX9-NEXT: s_setpc_b64 s[30:31] 2253; 2254; GFX11-LABEL: v3f32_struct_func_void_wasted_reg: 2255; GFX11: ; %bb.0: 2256; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2257; GFX11-NEXT: ds_load_b32 v0, v0 2258; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2259; GFX11-NEXT: ds_load_b32 v1, v0 2260; GFX11-NEXT: ds_load_b32 v2, v0 2261; GFX11-NEXT: ds_load_b32 v3, v0 2262; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2263; GFX11-NEXT: s_setpc_b64 s[30:31] 2264 %load0 = load volatile float, ptr addrspace(3) undef 2265 %load1 = load volatile float, ptr addrspace(3) undef 2266 %load2 = load volatile float, ptr addrspace(3) undef 2267 %load3 = load volatile i32, ptr addrspace(3) undef 2268 2269 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 2270 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 2271 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 2272 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 2273 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 2274 ret { <3 x float>, i32 } %insert.4 2275} 2276 2277define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 { 2278; CI-LABEL: void_func_sret_max_known_zero_bits: 2279; CI: ; %bb.0: 2280; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2281; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2282; CI-NEXT: s_mov_b32 m0, -1 2283; CI-NEXT: ds_write_b32 v0, v0 2284; CI-NEXT: v_mov_b32_e32 v0, 0 2285; CI-NEXT: ds_write_b32 v0, v0 2286; CI-NEXT: ds_write_b32 v0, v0 2287; CI-NEXT: s_waitcnt lgkmcnt(0) 2288; CI-NEXT: s_setpc_b64 s[30:31] 2289; 2290; GFX8-LABEL: void_func_sret_max_known_zero_bits: 2291; GFX8: ; %bb.0: 2292; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2293; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2294; GFX8-NEXT: s_mov_b32 m0, -1 2295; GFX8-NEXT: ds_write_b32 v0, v0 2296; GFX8-NEXT: v_mov_b32_e32 v0, 0 2297; GFX8-NEXT: ds_write_b32 v0, v0 2298; GFX8-NEXT: ds_write_b32 v0, v0 2299; GFX8-NEXT: s_waitcnt lgkmcnt(0) 2300; GFX8-NEXT: s_setpc_b64 s[30:31] 2301; 2302; GFX9-LABEL: void_func_sret_max_known_zero_bits: 2303; GFX9: ; %bb.0: 2304; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2306; GFX9-NEXT: ds_write_b32 v0, v0 2307; GFX9-NEXT: v_mov_b32_e32 v0, 0 2308; GFX9-NEXT: ds_write_b32 v0, v0 2309; GFX9-NEXT: ds_write_b32 v0, v0 2310; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2311; GFX9-NEXT: s_setpc_b64 s[30:31] 2312; 2313; GFX11-LABEL: void_func_sret_max_known_zero_bits: 2314; GFX11: ; %bb.0: 2315; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2316; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2317; GFX11-NEXT: v_lshrrev_b32_e32 v0, 17, v0 2318; GFX11-NEXT: v_mov_b32_e32 v2, 0 2319; GFX11-NEXT: ds_store_b32 v0, v1 2320; GFX11-NEXT: ds_store_b32 v0, v0 2321; GFX11-NEXT: ds_store_b32 v0, v2 2322; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2323; GFX11-NEXT: s_setpc_b64 s[30:31] 2324 %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32 2325 2326 %lshr0 = lshr i32 %arg0.int, 16 2327 %lshr1 = lshr i32 %arg0.int, 17 2328 %lshr2 = lshr i32 %arg0.int, 18 2329 2330 store volatile i32 %lshr0, ptr addrspace(3) undef 2331 store volatile i32 %lshr1, ptr addrspace(3) undef 2332 store volatile i32 %lshr2, ptr addrspace(3) undef 2333 ret void 2334} 2335 2336define bfloat @bf16_func_void() #0 { 2337; CI-LABEL: bf16_func_void: 2338; CI: ; %bb.0: 2339; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2340; CI-NEXT: s_mov_b32 s7, 0xf000 2341; CI-NEXT: s_mov_b32 s6, -1 2342; CI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 2343; CI-NEXT: s_waitcnt vmcnt(0) 2344; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2345; CI-NEXT: s_setpc_b64 s[30:31] 2346; 2347; GFX89-LABEL: bf16_func_void: 2348; GFX89: ; %bb.0: 2349; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2350; GFX89-NEXT: s_mov_b32 s7, 0xf000 2351; GFX89-NEXT: s_mov_b32 s6, -1 2352; GFX89-NEXT: buffer_load_ushort v0, off, s[4:7], 0 2353; GFX89-NEXT: s_waitcnt vmcnt(0) 2354; GFX89-NEXT: s_setpc_b64 s[30:31] 2355; 2356; GFX11-LABEL: bf16_func_void: 2357; GFX11: ; %bb.0: 2358; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2359; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2360; GFX11-NEXT: s_mov_b32 s2, -1 2361; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 2362; GFX11-NEXT: s_waitcnt vmcnt(0) 2363; GFX11-NEXT: s_setpc_b64 s[30:31] 2364 %val = load bfloat, ptr addrspace(1) undef 2365 ret bfloat %val 2366} 2367 2368define <2 x bfloat> @v2bf16_func_void() #0 { 2369; CI-LABEL: v2bf16_func_void: 2370; CI: ; %bb.0: 2371; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2372; CI-NEXT: s_mov_b32 s7, 0xf000 2373; CI-NEXT: s_mov_b32 s6, -1 2374; CI-NEXT: buffer_load_dword v1, off, s[4:7], 0 2375; CI-NEXT: s_waitcnt vmcnt(0) 2376; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2377; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 2378; CI-NEXT: s_setpc_b64 s[30:31] 2379; 2380; GFX89-LABEL: v2bf16_func_void: 2381; GFX89: ; %bb.0: 2382; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2383; GFX89-NEXT: s_mov_b32 s7, 0xf000 2384; GFX89-NEXT: s_mov_b32 s6, -1 2385; GFX89-NEXT: buffer_load_dword v0, off, s[4:7], 0 2386; GFX89-NEXT: s_waitcnt vmcnt(0) 2387; GFX89-NEXT: s_setpc_b64 s[30:31] 2388; 2389; GFX11-LABEL: v2bf16_func_void: 2390; GFX11: ; %bb.0: 2391; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2392; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2393; GFX11-NEXT: s_mov_b32 s2, -1 2394; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 2395; GFX11-NEXT: s_waitcnt vmcnt(0) 2396; GFX11-NEXT: s_setpc_b64 s[30:31] 2397 %val = load <2 x bfloat>, ptr addrspace(1) undef 2398 ret <2 x bfloat> %val 2399} 2400 2401define <3 x bfloat> @v3bf16_func_void() #0 { 2402; CI-LABEL: v3bf16_func_void: 2403; CI: ; %bb.0: 2404; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2405; CI-NEXT: s_mov_b32 s7, 0xf000 2406; CI-NEXT: s_mov_b32 s6, -1 2407; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[4:7], 0 2408; CI-NEXT: s_waitcnt vmcnt(0) 2409; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2410; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 2411; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2412; CI-NEXT: s_setpc_b64 s[30:31] 2413; 2414; GFX89-LABEL: v3bf16_func_void: 2415; GFX89: ; %bb.0: 2416; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2417; GFX89-NEXT: s_mov_b32 s7, 0xf000 2418; GFX89-NEXT: s_mov_b32 s6, -1 2419; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 2420; GFX89-NEXT: s_waitcnt vmcnt(0) 2421; GFX89-NEXT: s_setpc_b64 s[30:31] 2422; 2423; GFX11-LABEL: v3bf16_func_void: 2424; GFX11: ; %bb.0: 2425; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2426; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2427; GFX11-NEXT: s_mov_b32 s2, -1 2428; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 2429; GFX11-NEXT: s_waitcnt vmcnt(0) 2430; GFX11-NEXT: s_setpc_b64 s[30:31] 2431 %val = load <3 x bfloat>, ptr addrspace(1) undef 2432 ret <3 x bfloat> %val 2433} 2434 2435define <4 x bfloat> @v4bf16_func_void() #0 { 2436; CI-LABEL: v4bf16_func_void: 2437; CI: ; %bb.0: 2438; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2439; CI-NEXT: s_mov_b32 s7, 0xf000 2440; CI-NEXT: s_mov_b32 s6, -1 2441; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0 2442; CI-NEXT: s_waitcnt vmcnt(0) 2443; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v2 2444; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 2445; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v3 2446; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 2447; CI-NEXT: s_setpc_b64 s[30:31] 2448; 2449; GFX89-LABEL: v4bf16_func_void: 2450; GFX89: ; %bb.0: 2451; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2452; GFX89-NEXT: s_mov_b32 s7, 0xf000 2453; GFX89-NEXT: s_mov_b32 s6, -1 2454; GFX89-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 2455; GFX89-NEXT: s_waitcnt vmcnt(0) 2456; GFX89-NEXT: s_setpc_b64 s[30:31] 2457; 2458; GFX11-LABEL: v4bf16_func_void: 2459; GFX11: ; %bb.0: 2460; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2461; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2462; GFX11-NEXT: s_mov_b32 s2, -1 2463; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 2464; GFX11-NEXT: s_waitcnt vmcnt(0) 2465; GFX11-NEXT: s_setpc_b64 s[30:31] 2466 %val = load <4 x bfloat>, ptr addrspace(1) undef 2467 ret <4 x bfloat> %val 2468} 2469 2470define <6 x bfloat> @v6bf16_func_void() #0 { 2471; CI-LABEL: v6bf16_func_void: 2472; CI: ; %bb.0: 2473; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2474; CI-NEXT: s_mov_b32 s7, 0xf000 2475; CI-NEXT: s_mov_b32 s6, -1 2476; CI-NEXT: buffer_load_dwordx3 v[3:5], off, s[4:7], 0 2477; CI-NEXT: s_waitcnt vmcnt(0) 2478; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v3 2479; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v3 2480; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4 2481; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v4 2482; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v5 2483; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v5 2484; CI-NEXT: s_setpc_b64 s[30:31] 2485; 2486; GFX89-LABEL: v6bf16_func_void: 2487; GFX89: ; %bb.0: 2488; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2489; GFX89-NEXT: s_mov_b32 s7, 0xf000 2490; GFX89-NEXT: s_mov_b32 s6, -1 2491; GFX89-NEXT: buffer_load_dwordx3 v[0:2], off, s[4:7], 0 2492; GFX89-NEXT: s_waitcnt vmcnt(0) 2493; GFX89-NEXT: s_setpc_b64 s[30:31] 2494; 2495; GFX11-LABEL: v6bf16_func_void: 2496; GFX11: ; %bb.0: 2497; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2498; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2499; GFX11-NEXT: s_mov_b32 s2, -1 2500; GFX11-NEXT: buffer_load_b96 v[0:2], off, s[0:3], 0 2501; GFX11-NEXT: s_waitcnt vmcnt(0) 2502; GFX11-NEXT: s_setpc_b64 s[30:31] 2503 %val = load <6 x bfloat>, ptr addrspace(1) undef 2504 ret <6 x bfloat> %val 2505} 2506 2507define <8 x bfloat> @v8bf16_func_void() #0 { 2508; CI-LABEL: v8bf16_func_void: 2509; CI: ; %bb.0: 2510; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2511; CI-NEXT: s_mov_b32 s7, 0xf000 2512; CI-NEXT: s_mov_b32 s6, -1 2513; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 2514; CI-NEXT: s_waitcnt vmcnt(0) 2515; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 2516; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 2517; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 2518; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 2519; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 2520; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 2521; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 2522; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 2523; CI-NEXT: s_setpc_b64 s[30:31] 2524; 2525; GFX89-LABEL: v8bf16_func_void: 2526; GFX89: ; %bb.0: 2527; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2528; GFX89-NEXT: s_mov_b32 s7, 0xf000 2529; GFX89-NEXT: s_mov_b32 s6, -1 2530; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 2531; GFX89-NEXT: s_waitcnt vmcnt(0) 2532; GFX89-NEXT: s_setpc_b64 s[30:31] 2533; 2534; GFX11-LABEL: v8bf16_func_void: 2535; GFX11: ; %bb.0: 2536; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2537; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2538; GFX11-NEXT: s_mov_b32 s2, -1 2539; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 2540; GFX11-NEXT: s_waitcnt vmcnt(0) 2541; GFX11-NEXT: s_setpc_b64 s[30:31] 2542 %val = load <8 x bfloat>, ptr addrspace(1) undef 2543 ret <8 x bfloat> %val 2544} 2545 2546define <16 x bfloat> @v16bf16_func_void() #0 { 2547; CI-LABEL: v16bf16_func_void: 2548; CI: ; %bb.0: 2549; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2550; CI-NEXT: s_mov_b32 s7, 0xf000 2551; CI-NEXT: s_mov_b32 s6, -1 2552; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 2553; CI-NEXT: s_waitcnt vmcnt(0) 2554; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 2555; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 2556; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 2557; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 2558; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 2559; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 2560; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 2561; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 2562; CI-NEXT: v_mov_b32_e32 v8, v0 2563; CI-NEXT: v_mov_b32_e32 v9, v1 2564; CI-NEXT: v_mov_b32_e32 v10, v2 2565; CI-NEXT: v_mov_b32_e32 v11, v3 2566; CI-NEXT: v_mov_b32_e32 v12, v4 2567; CI-NEXT: v_mov_b32_e32 v13, v5 2568; CI-NEXT: v_mov_b32_e32 v14, v6 2569; CI-NEXT: v_mov_b32_e32 v15, v7 2570; CI-NEXT: s_setpc_b64 s[30:31] 2571; 2572; GFX89-LABEL: v16bf16_func_void: 2573; GFX89: ; %bb.0: 2574; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2575; GFX89-NEXT: s_mov_b32 s7, 0xf000 2576; GFX89-NEXT: s_mov_b32 s6, -1 2577; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 2578; GFX89-NEXT: s_waitcnt vmcnt(0) 2579; GFX89-NEXT: v_mov_b32_e32 v4, v0 2580; GFX89-NEXT: v_mov_b32_e32 v5, v1 2581; GFX89-NEXT: v_mov_b32_e32 v6, v2 2582; GFX89-NEXT: v_mov_b32_e32 v7, v3 2583; GFX89-NEXT: s_setpc_b64 s[30:31] 2584; 2585; GFX11-LABEL: v16bf16_func_void: 2586; GFX11: ; %bb.0: 2587; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2588; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2589; GFX11-NEXT: s_mov_b32 s2, -1 2590; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 2591; GFX11-NEXT: s_waitcnt vmcnt(0) 2592; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 2593; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3 2594; GFX11-NEXT: s_setpc_b64 s[30:31] 2595 %val = load <16 x bfloat>, ptr addrspace(1) undef 2596 ret <16 x bfloat> %val 2597} 2598 2599define <32 x bfloat> @v32bf16_func_void() #0 { 2600; CI-LABEL: v32bf16_func_void: 2601; CI: ; %bb.0: 2602; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2603; CI-NEXT: s_mov_b32 s7, 0xf000 2604; CI-NEXT: s_mov_b32 s6, -1 2605; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 2606; CI-NEXT: s_waitcnt vmcnt(0) 2607; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v4 2608; CI-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 2609; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v5 2610; CI-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 2611; CI-NEXT: v_lshlrev_b32_e32 v4, 16, v6 2612; CI-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 2613; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v7 2614; CI-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 2615; CI-NEXT: v_mov_b32_e32 v8, v0 2616; CI-NEXT: v_mov_b32_e32 v9, v1 2617; CI-NEXT: v_mov_b32_e32 v10, v2 2618; CI-NEXT: v_mov_b32_e32 v11, v3 2619; CI-NEXT: v_mov_b32_e32 v16, v0 2620; CI-NEXT: v_mov_b32_e32 v17, v1 2621; CI-NEXT: v_mov_b32_e32 v18, v2 2622; CI-NEXT: v_mov_b32_e32 v19, v3 2623; CI-NEXT: v_mov_b32_e32 v24, v0 2624; CI-NEXT: v_mov_b32_e32 v25, v1 2625; CI-NEXT: v_mov_b32_e32 v26, v2 2626; CI-NEXT: v_mov_b32_e32 v27, v3 2627; CI-NEXT: v_mov_b32_e32 v12, v4 2628; CI-NEXT: v_mov_b32_e32 v20, v4 2629; CI-NEXT: v_mov_b32_e32 v28, v4 2630; CI-NEXT: v_mov_b32_e32 v13, v5 2631; CI-NEXT: v_mov_b32_e32 v21, v5 2632; CI-NEXT: v_mov_b32_e32 v29, v5 2633; CI-NEXT: v_mov_b32_e32 v14, v6 2634; CI-NEXT: v_mov_b32_e32 v22, v6 2635; CI-NEXT: v_mov_b32_e32 v30, v6 2636; CI-NEXT: v_mov_b32_e32 v15, v7 2637; CI-NEXT: v_mov_b32_e32 v23, v7 2638; CI-NEXT: v_mov_b32_e32 v31, v7 2639; CI-NEXT: s_setpc_b64 s[30:31] 2640; 2641; GFX89-LABEL: v32bf16_func_void: 2642; GFX89: ; %bb.0: 2643; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2644; GFX89-NEXT: s_mov_b32 s7, 0xf000 2645; GFX89-NEXT: s_mov_b32 s6, -1 2646; GFX89-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 2647; GFX89-NEXT: s_waitcnt vmcnt(0) 2648; GFX89-NEXT: v_mov_b32_e32 v4, v0 2649; GFX89-NEXT: v_mov_b32_e32 v5, v1 2650; GFX89-NEXT: v_mov_b32_e32 v6, v2 2651; GFX89-NEXT: v_mov_b32_e32 v7, v3 2652; GFX89-NEXT: v_mov_b32_e32 v8, v0 2653; GFX89-NEXT: v_mov_b32_e32 v9, v1 2654; GFX89-NEXT: v_mov_b32_e32 v10, v2 2655; GFX89-NEXT: v_mov_b32_e32 v11, v3 2656; GFX89-NEXT: v_mov_b32_e32 v12, v0 2657; GFX89-NEXT: v_mov_b32_e32 v13, v1 2658; GFX89-NEXT: v_mov_b32_e32 v14, v2 2659; GFX89-NEXT: v_mov_b32_e32 v15, v3 2660; GFX89-NEXT: s_setpc_b64 s[30:31] 2661; 2662; GFX11-LABEL: v32bf16_func_void: 2663; GFX11: ; %bb.0: 2664; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2665; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2666; GFX11-NEXT: s_mov_b32 s2, -1 2667; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 2668; GFX11-NEXT: s_waitcnt vmcnt(0) 2669; GFX11-NEXT: v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1 2670; GFX11-NEXT: v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3 2671; GFX11-NEXT: v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1 2672; GFX11-NEXT: v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v3 2673; GFX11-NEXT: v_dual_mov_b32 v12, v0 :: v_dual_mov_b32 v13, v1 2674; GFX11-NEXT: v_dual_mov_b32 v14, v2 :: v_dual_mov_b32 v15, v3 2675; GFX11-NEXT: s_setpc_b64 s[30:31] 2676 %val = load <32 x bfloat>, ptr addrspace(1) undef 2677 ret <32 x bfloat> %val 2678} 2679 2680attributes #0 = { nounwind } 2681