1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 6 7define void @void_func_i1(i1 %arg0) #0 { 8; CIGFX89-LABEL: void_func_i1: 9; CIGFX89: ; %bb.0: 10; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 12; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 13; CIGFX89-NEXT: s_mov_b32 s6, -1 14; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 15; CIGFX89-NEXT: s_waitcnt vmcnt(0) 16; CIGFX89-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX11-LABEL: void_func_i1: 19; GFX11: ; %bb.0: 20; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 22; GFX11-NEXT: s_mov_b32 s3, 0x31016000 23; GFX11-NEXT: s_mov_b32 s2, -1 24; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 25; GFX11-NEXT: s_setpc_b64 s[30:31] 26 store i1 %arg0, ptr addrspace(1) undef 27 ret void 28} 29 30define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { 31; CIGFX89-LABEL: void_func_i1_zeroext: 32; CIGFX89: ; %bb.0: 33; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; CIGFX89-NEXT: v_or_b32_e32 v0, 12, v0 35; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 36; CIGFX89-NEXT: s_mov_b32 s6, -1 37; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 38; CIGFX89-NEXT: s_waitcnt vmcnt(0) 39; CIGFX89-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX11-LABEL: void_func_i1_zeroext: 42; GFX11: ; %bb.0: 43; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX11-NEXT: v_or_b32_e32 v0, 12, v0 45; GFX11-NEXT: s_mov_b32 s3, 0x31016000 46; GFX11-NEXT: s_mov_b32 s2, -1 47; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 48; GFX11-NEXT: s_setpc_b64 s[30:31] 49 %ext = zext i1 %arg0 to i32 50 %add = add i32 %ext, 12 51 store i32 %add, ptr addrspace(1) undef 52 ret void 53} 54 55define void @void_func_i1_signext(i1 signext %arg0) #0 { 56; CI-LABEL: void_func_i1_signext: 57; CI: ; %bb.0: 58; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 60; CI-NEXT: s_mov_b32 s7, 0xf000 61; CI-NEXT: s_mov_b32 s6, -1 62; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 63; CI-NEXT: s_waitcnt vmcnt(0) 64; CI-NEXT: s_setpc_b64 s[30:31] 65; 66; VI-LABEL: void_func_i1_signext: 67; VI: ; %bb.0: 68; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 70; VI-NEXT: s_mov_b32 s7, 0xf000 71; VI-NEXT: s_mov_b32 s6, -1 72; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 73; VI-NEXT: s_waitcnt vmcnt(0) 74; VI-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-LABEL: void_func_i1_signext: 77; GFX9: ; %bb.0: 78; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 80; GFX9-NEXT: s_mov_b32 s7, 0xf000 81; GFX9-NEXT: s_mov_b32 s6, -1 82; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 83; GFX9-NEXT: s_waitcnt vmcnt(0) 84; GFX9-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX11-LABEL: void_func_i1_signext: 87; GFX11: ; %bb.0: 88; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 90; GFX11-NEXT: s_mov_b32 s3, 0x31016000 91; GFX11-NEXT: s_mov_b32 s2, -1 92; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 93; GFX11-NEXT: s_setpc_b64 s[30:31] 94 %ext = sext i1 %arg0 to i32 95 %add = add i32 %ext, 12 96 store i32 %add, ptr addrspace(1) undef 97 ret void 98} 99 100define void @i1_arg_i1_use(i1 %arg) #0 { 101; CIGFX89-LABEL: i1_arg_i1_use: 102; CIGFX89: ; %bb.0: ; %bb 103; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; CIGFX89-NEXT: v_and_b32_e32 v0, 1, v0 105; CIGFX89-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 106; CIGFX89-NEXT: s_xor_b64 s[6:7], vcc, -1 107; CIGFX89-NEXT: s_and_saveexec_b64 s[4:5], s[6:7] 108; CIGFX89-NEXT: s_cbranch_execz .LBB3_2 109; CIGFX89-NEXT: ; %bb.1: ; %bb1 110; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 111; CIGFX89-NEXT: s_mov_b32 s6, -1 112; CIGFX89-NEXT: v_mov_b32_e32 v0, 0 113; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 114; CIGFX89-NEXT: s_waitcnt vmcnt(0) 115; CIGFX89-NEXT: .LBB3_2: ; %bb2 116; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5] 117; CIGFX89-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX11-LABEL: i1_arg_i1_use: 120; GFX11: ; %bb.0: ; %bb 121; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 123; GFX11-NEXT: s_mov_b32 s2, -1 124; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 125; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 126; GFX11-NEXT: s_xor_b32 s1, vcc_lo, -1 127; GFX11-NEXT: s_and_saveexec_b32 s0, s1 128; GFX11-NEXT: s_cbranch_execz .LBB3_2 129; GFX11-NEXT: ; %bb.1: ; %bb1 130; GFX11-NEXT: v_mov_b32_e32 v0, 0 131; GFX11-NEXT: s_mov_b32 s3, 0x31016000 132; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc 133; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 134; GFX11-NEXT: .LBB3_2: ; %bb2 135; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s0 136; GFX11-NEXT: s_setpc_b64 s[30:31] 137bb: 138 br i1 %arg, label %bb2, label %bb1 139 140bb1: 141 store volatile i32 0, ptr addrspace(1) undef 142 br label %bb2 143 144bb2: 145 ret void 146} 147 148define void @void_func_i8(i8 %arg0) #0 { 149; CIGFX89-LABEL: void_func_i8: 150; CIGFX89: ; %bb.0: 151; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 153; CIGFX89-NEXT: s_mov_b32 s6, -1 154; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 155; CIGFX89-NEXT: s_waitcnt vmcnt(0) 156; CIGFX89-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX11-LABEL: void_func_i8: 159; GFX11: ; %bb.0: 160; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX11-NEXT: s_mov_b32 s3, 0x31016000 162; GFX11-NEXT: s_mov_b32 s2, -1 163; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 164; GFX11-NEXT: s_setpc_b64 s[30:31] 165 store i8 %arg0, ptr addrspace(1) undef 166 ret void 167} 168 169define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { 170; CI-LABEL: void_func_i8_zeroext: 171; CI: ; %bb.0: 172; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 174; CI-NEXT: s_mov_b32 s7, 0xf000 175; CI-NEXT: s_mov_b32 s6, -1 176; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 177; CI-NEXT: s_waitcnt vmcnt(0) 178; CI-NEXT: s_setpc_b64 s[30:31] 179; 180; VI-LABEL: void_func_i8_zeroext: 181; VI: ; %bb.0: 182; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 184; VI-NEXT: s_mov_b32 s7, 0xf000 185; VI-NEXT: s_mov_b32 s6, -1 186; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 187; VI-NEXT: s_waitcnt vmcnt(0) 188; VI-NEXT: s_setpc_b64 s[30:31] 189; 190; GFX9-LABEL: void_func_i8_zeroext: 191; GFX9: ; %bb.0: 192; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 194; GFX9-NEXT: s_mov_b32 s7, 0xf000 195; GFX9-NEXT: s_mov_b32 s6, -1 196; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 197; GFX9-NEXT: s_waitcnt vmcnt(0) 198; GFX9-NEXT: s_setpc_b64 s[30:31] 199; 200; GFX11-LABEL: void_func_i8_zeroext: 201; GFX11: ; %bb.0: 202; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 204; GFX11-NEXT: s_mov_b32 s3, 0x31016000 205; GFX11-NEXT: s_mov_b32 s2, -1 206; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 207; GFX11-NEXT: s_setpc_b64 s[30:31] 208 %ext = zext i8 %arg0 to i32 209 %add = add i32 %ext, 12 210 store i32 %add, ptr addrspace(1) undef 211 ret void 212} 213 214define void @void_func_i8_signext(i8 signext %arg0) #0 { 215; CI-LABEL: void_func_i8_signext: 216; CI: ; %bb.0: 217; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 219; CI-NEXT: s_mov_b32 s7, 0xf000 220; CI-NEXT: s_mov_b32 s6, -1 221; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 222; CI-NEXT: s_waitcnt vmcnt(0) 223; CI-NEXT: s_setpc_b64 s[30:31] 224; 225; VI-LABEL: void_func_i8_signext: 226; VI: ; %bb.0: 227; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 229; VI-NEXT: s_mov_b32 s7, 0xf000 230; VI-NEXT: s_mov_b32 s6, -1 231; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 232; VI-NEXT: s_waitcnt vmcnt(0) 233; VI-NEXT: s_setpc_b64 s[30:31] 234; 235; GFX9-LABEL: void_func_i8_signext: 236; GFX9: ; %bb.0: 237; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 239; GFX9-NEXT: s_mov_b32 s7, 0xf000 240; GFX9-NEXT: s_mov_b32 s6, -1 241; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 242; GFX9-NEXT: s_waitcnt vmcnt(0) 243; GFX9-NEXT: s_setpc_b64 s[30:31] 244; 245; GFX11-LABEL: void_func_i8_signext: 246; GFX11: ; %bb.0: 247; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 249; GFX11-NEXT: s_mov_b32 s3, 0x31016000 250; GFX11-NEXT: s_mov_b32 s2, -1 251; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 252; GFX11-NEXT: s_setpc_b64 s[30:31] 253 %ext = sext i8 %arg0 to i32 254 %add = add i32 %ext, 12 255 store i32 %add, ptr addrspace(1) undef 256 ret void 257} 258 259define void @void_func_i16(i16 %arg0) #0 { 260; CIGFX89-LABEL: void_func_i16: 261; CIGFX89: ; %bb.0: 262; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 264; CIGFX89-NEXT: s_mov_b32 s6, -1 265; CIGFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 266; CIGFX89-NEXT: s_waitcnt vmcnt(0) 267; CIGFX89-NEXT: s_setpc_b64 s[30:31] 268; 269; GFX11-LABEL: void_func_i16: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX11-NEXT: s_mov_b32 s3, 0x31016000 273; GFX11-NEXT: s_mov_b32 s2, -1 274; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 275; GFX11-NEXT: s_setpc_b64 s[30:31] 276 store i16 %arg0, ptr addrspace(1) undef 277 ret void 278} 279 280define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { 281; CI-LABEL: void_func_i16_zeroext: 282; CI: ; %bb.0: 283; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 285; CI-NEXT: s_mov_b32 s7, 0xf000 286; CI-NEXT: s_mov_b32 s6, -1 287; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 288; CI-NEXT: s_waitcnt vmcnt(0) 289; CI-NEXT: s_setpc_b64 s[30:31] 290; 291; VI-LABEL: void_func_i16_zeroext: 292; VI: ; %bb.0: 293; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 295; VI-NEXT: s_mov_b32 s7, 0xf000 296; VI-NEXT: s_mov_b32 s6, -1 297; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 298; VI-NEXT: s_waitcnt vmcnt(0) 299; VI-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX9-LABEL: void_func_i16_zeroext: 302; GFX9: ; %bb.0: 303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 305; GFX9-NEXT: s_mov_b32 s7, 0xf000 306; GFX9-NEXT: s_mov_b32 s6, -1 307; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 308; GFX9-NEXT: s_waitcnt vmcnt(0) 309; GFX9-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX11-LABEL: void_func_i16_zeroext: 312; GFX11: ; %bb.0: 313; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 315; GFX11-NEXT: s_mov_b32 s3, 0x31016000 316; GFX11-NEXT: s_mov_b32 s2, -1 317; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 318; GFX11-NEXT: s_setpc_b64 s[30:31] 319 %ext = zext i16 %arg0 to i32 320 %add = add i32 %ext, 12 321 store i32 %add, ptr addrspace(1) undef 322 ret void 323} 324 325define void @void_func_i16_signext(i16 signext %arg0) #0 { 326; CI-LABEL: void_func_i16_signext: 327; CI: ; %bb.0: 328; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 329; CI-NEXT: v_add_i32_e32 v0, vcc, 12, v0 330; CI-NEXT: s_mov_b32 s7, 0xf000 331; CI-NEXT: s_mov_b32 s6, -1 332; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 333; CI-NEXT: s_waitcnt vmcnt(0) 334; CI-NEXT: s_setpc_b64 s[30:31] 335; 336; VI-LABEL: void_func_i16_signext: 337; VI: ; %bb.0: 338; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; VI-NEXT: v_add_u32_e32 v0, vcc, 12, v0 340; VI-NEXT: s_mov_b32 s7, 0xf000 341; VI-NEXT: s_mov_b32 s6, -1 342; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 343; VI-NEXT: s_waitcnt vmcnt(0) 344; VI-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX9-LABEL: void_func_i16_signext: 347; GFX9: ; %bb.0: 348; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX9-NEXT: v_add_u32_e32 v0, 12, v0 350; GFX9-NEXT: s_mov_b32 s7, 0xf000 351; GFX9-NEXT: s_mov_b32 s6, -1 352; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0 353; GFX9-NEXT: s_waitcnt vmcnt(0) 354; GFX9-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX11-LABEL: void_func_i16_signext: 357; GFX11: ; %bb.0: 358; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX11-NEXT: v_add_nc_u32_e32 v0, 12, v0 360; GFX11-NEXT: s_mov_b32 s3, 0x31016000 361; GFX11-NEXT: s_mov_b32 s2, -1 362; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 363; GFX11-NEXT: s_setpc_b64 s[30:31] 364 %ext = sext i16 %arg0 to i32 365 %add = add i32 %ext, 12 366 store i32 %add, ptr addrspace(1) undef 367 ret void 368} 369 370define void @void_func_i32(i32 %arg0) #0 { 371; CIGFX89-LABEL: void_func_i32: 372; CIGFX89: ; %bb.0: 373; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 375; CIGFX89-NEXT: s_mov_b32 s6, -1 376; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 377; CIGFX89-NEXT: s_waitcnt vmcnt(0) 378; CIGFX89-NEXT: s_setpc_b64 s[30:31] 379; 380; GFX11-LABEL: void_func_i32: 381; GFX11: ; %bb.0: 382; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 383; GFX11-NEXT: s_mov_b32 s3, 0x31016000 384; GFX11-NEXT: s_mov_b32 s2, -1 385; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 386; GFX11-NEXT: s_setpc_b64 s[30:31] 387 store i32 %arg0, ptr addrspace(1) undef 388 ret void 389} 390 391define void @void_func_i64(i64 %arg0) #0 { 392; CIGFX89-LABEL: void_func_i64: 393; CIGFX89: ; %bb.0: 394; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 396; CIGFX89-NEXT: s_mov_b32 s6, -1 397; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 398; CIGFX89-NEXT: s_waitcnt vmcnt(0) 399; CIGFX89-NEXT: s_setpc_b64 s[30:31] 400; 401; GFX11-LABEL: void_func_i64: 402; GFX11: ; %bb.0: 403; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; GFX11-NEXT: s_mov_b32 s3, 0x31016000 405; GFX11-NEXT: s_mov_b32 s2, -1 406; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 407; GFX11-NEXT: s_setpc_b64 s[30:31] 408 store i64 %arg0, ptr addrspace(1) undef 409 ret void 410} 411 412define void @void_func_f16(half %arg0) #0 { 413; CI-LABEL: void_func_f16: 414; CI: ; %bb.0: 415; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 417; CI-NEXT: s_mov_b32 s7, 0xf000 418; CI-NEXT: s_mov_b32 s6, -1 419; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 420; CI-NEXT: s_waitcnt vmcnt(0) 421; CI-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX89-LABEL: void_func_f16: 424; GFX89: ; %bb.0: 425; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX89-NEXT: s_mov_b32 s7, 0xf000 427; GFX89-NEXT: s_mov_b32 s6, -1 428; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 429; GFX89-NEXT: s_waitcnt vmcnt(0) 430; GFX89-NEXT: s_setpc_b64 s[30:31] 431; 432; GFX11-LABEL: void_func_f16: 433; GFX11: ; %bb.0: 434; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 435; GFX11-NEXT: s_mov_b32 s3, 0x31016000 436; GFX11-NEXT: s_mov_b32 s2, -1 437; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 438; GFX11-NEXT: s_setpc_b64 s[30:31] 439 store half %arg0, ptr addrspace(1) undef 440 ret void 441} 442 443define void @void_func_f32(float %arg0) #0 { 444; CIGFX89-LABEL: void_func_f32: 445; CIGFX89: ; %bb.0: 446; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 447; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 448; CIGFX89-NEXT: s_mov_b32 s6, -1 449; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 450; CIGFX89-NEXT: s_waitcnt vmcnt(0) 451; CIGFX89-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX11-LABEL: void_func_f32: 454; GFX11: ; %bb.0: 455; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX11-NEXT: s_mov_b32 s3, 0x31016000 457; GFX11-NEXT: s_mov_b32 s2, -1 458; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 459; GFX11-NEXT: s_setpc_b64 s[30:31] 460 store float %arg0, ptr addrspace(1) undef 461 ret void 462} 463 464define void @void_func_f64(double %arg0) #0 { 465; CIGFX89-LABEL: void_func_f64: 466; CIGFX89: ; %bb.0: 467; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 468; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 469; CIGFX89-NEXT: s_mov_b32 s6, -1 470; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 471; CIGFX89-NEXT: s_waitcnt vmcnt(0) 472; CIGFX89-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX11-LABEL: void_func_f64: 475; GFX11: ; %bb.0: 476; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX11-NEXT: s_mov_b32 s3, 0x31016000 478; GFX11-NEXT: s_mov_b32 s2, -1 479; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 480; GFX11-NEXT: s_setpc_b64 s[30:31] 481 store double %arg0, ptr addrspace(1) undef 482 ret void 483} 484 485define void @void_func_v2i32(<2 x i32> %arg0) #0 { 486; CIGFX89-LABEL: void_func_v2i32: 487; CIGFX89: ; %bb.0: 488; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 490; CIGFX89-NEXT: s_mov_b32 s6, -1 491; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 492; CIGFX89-NEXT: s_waitcnt vmcnt(0) 493; CIGFX89-NEXT: s_setpc_b64 s[30:31] 494; 495; GFX11-LABEL: void_func_v2i32: 496; GFX11: ; %bb.0: 497; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GFX11-NEXT: s_mov_b32 s3, 0x31016000 499; GFX11-NEXT: s_mov_b32 s2, -1 500; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 501; GFX11-NEXT: s_setpc_b64 s[30:31] 502 store <2 x i32> %arg0, ptr addrspace(1) undef 503 ret void 504} 505 506define void @void_func_v3i32(<3 x i32> %arg0) #0 { 507; CIGFX89-LABEL: void_func_v3i32: 508; CIGFX89: ; %bb.0: 509; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 511; CIGFX89-NEXT: s_mov_b32 s6, -1 512; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 513; CIGFX89-NEXT: s_waitcnt vmcnt(0) 514; CIGFX89-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX11-LABEL: void_func_v3i32: 517; GFX11: ; %bb.0: 518; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX11-NEXT: s_mov_b32 s3, 0x31016000 520; GFX11-NEXT: s_mov_b32 s2, -1 521; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 522; GFX11-NEXT: s_setpc_b64 s[30:31] 523 store <3 x i32> %arg0, ptr addrspace(1) undef 524 ret void 525} 526 527define void @void_func_v4i32(<4 x i32> %arg0) #0 { 528; CIGFX89-LABEL: void_func_v4i32: 529; CIGFX89: ; %bb.0: 530; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 531; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 532; CIGFX89-NEXT: s_mov_b32 s6, -1 533; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 534; CIGFX89-NEXT: s_waitcnt vmcnt(0) 535; CIGFX89-NEXT: s_setpc_b64 s[30:31] 536; 537; GFX11-LABEL: void_func_v4i32: 538; GFX11: ; %bb.0: 539; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 540; GFX11-NEXT: s_mov_b32 s3, 0x31016000 541; GFX11-NEXT: s_mov_b32 s2, -1 542; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 543; GFX11-NEXT: s_setpc_b64 s[30:31] 544 store <4 x i32> %arg0, ptr addrspace(1) undef 545 ret void 546} 547 548define void @void_func_v5i32(<5 x i32> %arg0) #0 { 549; CIGFX89-LABEL: void_func_v5i32: 550; CIGFX89: ; %bb.0: 551; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 553; CIGFX89-NEXT: s_mov_b32 s6, -1 554; CIGFX89-NEXT: buffer_store_dword v4, off, s[4:7], 0 555; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 556; CIGFX89-NEXT: s_waitcnt vmcnt(0) 557; CIGFX89-NEXT: s_setpc_b64 s[30:31] 558; 559; GFX11-LABEL: void_func_v5i32: 560; GFX11: ; %bb.0: 561; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 562; GFX11-NEXT: s_mov_b32 s3, 0x31016000 563; GFX11-NEXT: s_mov_b32 s2, -1 564; GFX11-NEXT: s_clause 0x1 565; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 566; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 567; GFX11-NEXT: s_setpc_b64 s[30:31] 568 store <5 x i32> %arg0, ptr addrspace(1) undef 569 ret void 570} 571 572define void @void_func_v8i32(<8 x i32> %arg0) #0 { 573; CIGFX89-LABEL: void_func_v8i32: 574; CIGFX89: ; %bb.0: 575; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 576; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 577; CIGFX89-NEXT: s_mov_b32 s6, -1 578; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 579; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 580; CIGFX89-NEXT: s_waitcnt vmcnt(0) 581; CIGFX89-NEXT: s_setpc_b64 s[30:31] 582; 583; GFX11-LABEL: void_func_v8i32: 584; GFX11: ; %bb.0: 585; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; GFX11-NEXT: s_mov_b32 s3, 0x31016000 587; GFX11-NEXT: s_mov_b32 s2, -1 588; GFX11-NEXT: s_clause 0x1 589; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 590; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 591; GFX11-NEXT: s_setpc_b64 s[30:31] 592 store <8 x i32> %arg0, ptr addrspace(1) undef 593 ret void 594} 595 596define void @void_func_v16i32(<16 x i32> %arg0) #0 { 597; CIGFX89-LABEL: void_func_v16i32: 598; CIGFX89: ; %bb.0: 599; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 601; CIGFX89-NEXT: s_mov_b32 s6, -1 602; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 603; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 604; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 605; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 606; CIGFX89-NEXT: s_waitcnt vmcnt(0) 607; CIGFX89-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX11-LABEL: void_func_v16i32: 610; GFX11: ; %bb.0: 611; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX11-NEXT: s_mov_b32 s3, 0x31016000 613; GFX11-NEXT: s_mov_b32 s2, -1 614; GFX11-NEXT: s_clause 0x3 615; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 616; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 617; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 618; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 619; GFX11-NEXT: s_setpc_b64 s[30:31] 620 store <16 x i32> %arg0, ptr addrspace(1) undef 621 ret void 622} 623 624define void @void_func_v32i32(<32 x i32> %arg0) #0 { 625; CIGFX89-LABEL: void_func_v32i32: 626; CIGFX89: ; %bb.0: 627; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 629; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 630; CIGFX89-NEXT: s_mov_b32 s6, -1 631; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 632; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 633; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 634; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 635; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 636; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 637; CIGFX89-NEXT: s_waitcnt vmcnt(6) 638; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 639; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 640; CIGFX89-NEXT: s_waitcnt vmcnt(0) 641; CIGFX89-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX11-LABEL: void_func_v32i32: 644; GFX11: ; %bb.0: 645; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX11-NEXT: scratch_load_b32 v31, off, s32 647; GFX11-NEXT: s_mov_b32 s3, 0x31016000 648; GFX11-NEXT: s_mov_b32 s2, -1 649; GFX11-NEXT: s_clause 0x3 650; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 651; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 652; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 653; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 654; GFX11-NEXT: s_waitcnt vmcnt(0) 655; GFX11-NEXT: s_clause 0x3 656; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 657; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 658; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 659; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 660; GFX11-NEXT: s_setpc_b64 s[30:31] 661 store <32 x i32> %arg0, ptr addrspace(1) undef 662 ret void 663} 664 665; 1 over register limit 666define void @void_func_v33i32(<33 x i32> %arg0) #0 { 667; CI-LABEL: void_func_v33i32: 668; CI: ; %bb.0: 669; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; CI-NEXT: s_mov_b32 s7, 0xf000 671; CI-NEXT: s_mov_b32 s6, -1 672; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 673; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 674; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 675; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 676; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 677; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 678; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 679; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 680; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 681; CI-NEXT: s_waitcnt vmcnt(6) 682; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 683; CI-NEXT: s_waitcnt vmcnt(6) 684; CI-NEXT: buffer_store_dword v20, off, s[4:7], 0 685; CI-NEXT: s_waitcnt vmcnt(0) 686; CI-NEXT: s_setpc_b64 s[30:31] 687; 688; VI-LABEL: void_func_v33i32: 689; VI: ; %bb.0: 690; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; VI-NEXT: s_mov_b32 s7, 0xf000 692; VI-NEXT: s_mov_b32 s6, -1 693; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 694; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 695; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 696; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 697; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 698; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 699; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 700; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 701; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 702; VI-NEXT: s_waitcnt vmcnt(6) 703; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 704; VI-NEXT: s_waitcnt vmcnt(6) 705; VI-NEXT: buffer_store_dword v20, off, s[4:7], 0 706; VI-NEXT: s_waitcnt vmcnt(0) 707; VI-NEXT: s_setpc_b64 s[30:31] 708; 709; GFX9-LABEL: void_func_v33i32: 710; GFX9: ; %bb.0: 711; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX9-NEXT: s_mov_b32 s7, 0xf000 713; GFX9-NEXT: s_mov_b32 s6, -1 714; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 715; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 716; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 717; GFX9-NEXT: s_nop 0 718; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:4 719; GFX9-NEXT: s_nop 0 720; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 721; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 722; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 723; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 724; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 725; GFX9-NEXT: s_waitcnt vmcnt(6) 726; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 727; GFX9-NEXT: s_waitcnt vmcnt(6) 728; GFX9-NEXT: buffer_store_dword v20, off, s[4:7], 0 729; GFX9-NEXT: s_waitcnt vmcnt(0) 730; GFX9-NEXT: s_setpc_b64 s[30:31] 731; 732; GFX11-LABEL: void_func_v33i32: 733; GFX11: ; %bb.0: 734; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 735; GFX11-NEXT: s_clause 0x1 736; GFX11-NEXT: scratch_load_b32 v31, off, s32 737; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 738; GFX11-NEXT: s_mov_b32 s3, 0x31016000 739; GFX11-NEXT: s_mov_b32 s2, -1 740; GFX11-NEXT: s_clause 0x5 741; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 742; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 743; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 744; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 745; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 746; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 747; GFX11-NEXT: s_waitcnt vmcnt(1) 748; GFX11-NEXT: s_clause 0x1 749; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 750; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 751; GFX11-NEXT: s_waitcnt vmcnt(0) 752; GFX11-NEXT: buffer_store_b32 v32, off, s[0:3], 0 753; GFX11-NEXT: s_setpc_b64 s[30:31] 754 store <33 x i32> %arg0, ptr addrspace(1) undef 755 ret void 756} 757 758define void @void_func_v2i64(<2 x i64> %arg0) #0 { 759; CIGFX89-LABEL: void_func_v2i64: 760; CIGFX89: ; %bb.0: 761; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 762; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 763; CIGFX89-NEXT: s_mov_b32 s6, -1 764; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 765; CIGFX89-NEXT: s_waitcnt vmcnt(0) 766; CIGFX89-NEXT: s_setpc_b64 s[30:31] 767; 768; GFX11-LABEL: void_func_v2i64: 769; GFX11: ; %bb.0: 770; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 771; GFX11-NEXT: s_mov_b32 s3, 0x31016000 772; GFX11-NEXT: s_mov_b32 s2, -1 773; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 774; GFX11-NEXT: s_setpc_b64 s[30:31] 775 store <2 x i64> %arg0, ptr addrspace(1) undef 776 ret void 777} 778 779define void @void_func_v3i64(<3 x i64> %arg0) #0 { 780; CIGFX89-LABEL: void_func_v3i64: 781; CIGFX89: ; %bb.0: 782; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 783; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 784; CIGFX89-NEXT: s_mov_b32 s6, -1 785; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 786; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 787; CIGFX89-NEXT: s_waitcnt vmcnt(0) 788; CIGFX89-NEXT: s_setpc_b64 s[30:31] 789; 790; GFX11-LABEL: void_func_v3i64: 791; GFX11: ; %bb.0: 792; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GFX11-NEXT: s_mov_b32 s3, 0x31016000 794; GFX11-NEXT: s_mov_b32 s2, -1 795; GFX11-NEXT: s_clause 0x1 796; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 797; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 798; GFX11-NEXT: s_setpc_b64 s[30:31] 799 store <3 x i64> %arg0, ptr addrspace(1) undef 800 ret void 801} 802 803define void @void_func_v4i64(<4 x i64> %arg0) #0 { 804; CIGFX89-LABEL: void_func_v4i64: 805; CIGFX89: ; %bb.0: 806; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 807; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 808; CIGFX89-NEXT: s_mov_b32 s6, -1 809; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 810; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 811; CIGFX89-NEXT: s_waitcnt vmcnt(0) 812; CIGFX89-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX11-LABEL: void_func_v4i64: 815; GFX11: ; %bb.0: 816; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX11-NEXT: s_mov_b32 s3, 0x31016000 818; GFX11-NEXT: s_mov_b32 s2, -1 819; GFX11-NEXT: s_clause 0x1 820; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 821; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 822; GFX11-NEXT: s_setpc_b64 s[30:31] 823 store <4 x i64> %arg0, ptr addrspace(1) undef 824 ret void 825} 826 827define void @void_func_v5i64(<5 x i64> %arg0) #0 { 828; CIGFX89-LABEL: void_func_v5i64: 829; CIGFX89: ; %bb.0: 830; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 831; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 832; CIGFX89-NEXT: s_mov_b32 s6, -1 833; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 834; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 835; CIGFX89-NEXT: buffer_store_dwordx2 v[8:9], off, s[4:7], 0 836; CIGFX89-NEXT: s_waitcnt vmcnt(0) 837; CIGFX89-NEXT: s_setpc_b64 s[30:31] 838; 839; GFX11-LABEL: void_func_v5i64: 840; GFX11: ; %bb.0: 841; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 842; GFX11-NEXT: s_mov_b32 s3, 0x31016000 843; GFX11-NEXT: s_mov_b32 s2, -1 844; GFX11-NEXT: s_clause 0x2 845; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 846; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 847; GFX11-NEXT: buffer_store_b64 v[8:9], off, s[0:3], 0 848; GFX11-NEXT: s_setpc_b64 s[30:31] 849 store <5 x i64> %arg0, ptr addrspace(1) undef 850 ret void 851} 852 853define void @void_func_v8i64(<8 x i64> %arg0) #0 { 854; CIGFX89-LABEL: void_func_v8i64: 855; CIGFX89: ; %bb.0: 856; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 857; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 858; CIGFX89-NEXT: s_mov_b32 s6, -1 859; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 860; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 861; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 862; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 863; CIGFX89-NEXT: s_waitcnt vmcnt(0) 864; CIGFX89-NEXT: s_setpc_b64 s[30:31] 865; 866; GFX11-LABEL: void_func_v8i64: 867; GFX11: ; %bb.0: 868; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 869; GFX11-NEXT: s_mov_b32 s3, 0x31016000 870; GFX11-NEXT: s_mov_b32 s2, -1 871; GFX11-NEXT: s_clause 0x3 872; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 873; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 874; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 875; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 876; GFX11-NEXT: s_setpc_b64 s[30:31] 877 store <8 x i64> %arg0, ptr addrspace(1) undef 878 ret void 879} 880 881define void @void_func_v16i64(<16 x i64> %arg0) #0 { 882; CIGFX89-LABEL: void_func_v16i64: 883; CIGFX89: ; %bb.0: 884; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 885; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 886; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 887; CIGFX89-NEXT: s_mov_b32 s6, -1 888; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 889; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 890; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 891; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 892; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 893; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 894; CIGFX89-NEXT: s_waitcnt vmcnt(6) 895; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 896; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 897; CIGFX89-NEXT: s_waitcnt vmcnt(0) 898; CIGFX89-NEXT: s_setpc_b64 s[30:31] 899; 900; GFX11-LABEL: void_func_v16i64: 901; GFX11: ; %bb.0: 902; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 903; GFX11-NEXT: scratch_load_b32 v31, off, s32 904; GFX11-NEXT: s_mov_b32 s3, 0x31016000 905; GFX11-NEXT: s_mov_b32 s2, -1 906; GFX11-NEXT: s_clause 0x3 907; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 908; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 909; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 910; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 911; GFX11-NEXT: s_waitcnt vmcnt(0) 912; GFX11-NEXT: s_clause 0x3 913; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 914; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 915; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 916; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 917; GFX11-NEXT: s_setpc_b64 s[30:31] 918 store <16 x i64> %arg0, ptr addrspace(1) undef 919 ret void 920} 921 922define void @void_func_v2i8(<2 x i8> %arg0) #0 { 923; CI-LABEL: void_func_v2i8: 924; CI: ; %bb.0: 925; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 926; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 927; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 928; CI-NEXT: s_mov_b32 s4, 0 929; CI-NEXT: v_or_b32_e32 v0, v0, v1 930; CI-NEXT: s_mov_b32 s7, 0xf000 931; CI-NEXT: s_mov_b32 s6, -1 932; CI-NEXT: s_mov_b32 s5, s4 933; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 934; CI-NEXT: s_waitcnt vmcnt(0) 935; CI-NEXT: s_setpc_b64 s[30:31] 936; 937; GFX89-LABEL: void_func_v2i8: 938; GFX89: ; %bb.0: 939; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 940; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 941; GFX89-NEXT: s_mov_b32 s4, 0 942; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 943; GFX89-NEXT: s_mov_b32 s7, 0xf000 944; GFX89-NEXT: s_mov_b32 s6, -1 945; GFX89-NEXT: s_mov_b32 s5, s4 946; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 947; GFX89-NEXT: s_waitcnt vmcnt(0) 948; GFX89-NEXT: s_setpc_b64 s[30:31] 949; 950; GFX11-LABEL: void_func_v2i8: 951; GFX11: ; %bb.0: 952; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 953; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 954; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 955; GFX11-NEXT: s_mov_b32 s0, 0 956; GFX11-NEXT: s_mov_b32 s3, 0x31016000 957; GFX11-NEXT: s_mov_b32 s2, -1 958; GFX11-NEXT: s_mov_b32 s1, s0 959; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 960; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 961; GFX11-NEXT: s_setpc_b64 s[30:31] 962 store <2 x i8> %arg0, ptr addrspace(1) null 963 ret void 964} 965 966define void @void_func_v2i16(<2 x i16> %arg0) #0 { 967; CI-LABEL: void_func_v2i16: 968; CI: ; %bb.0: 969; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 971; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 972; CI-NEXT: v_or_b32_e32 v0, v0, v1 973; CI-NEXT: s_mov_b32 s7, 0xf000 974; CI-NEXT: s_mov_b32 s6, -1 975; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 976; CI-NEXT: s_waitcnt vmcnt(0) 977; CI-NEXT: s_setpc_b64 s[30:31] 978; 979; GFX89-LABEL: void_func_v2i16: 980; GFX89: ; %bb.0: 981; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GFX89-NEXT: s_mov_b32 s7, 0xf000 983; GFX89-NEXT: s_mov_b32 s6, -1 984; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 985; GFX89-NEXT: s_waitcnt vmcnt(0) 986; GFX89-NEXT: s_setpc_b64 s[30:31] 987; 988; GFX11-LABEL: void_func_v2i16: 989; GFX11: ; %bb.0: 990; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; GFX11-NEXT: s_mov_b32 s3, 0x31016000 992; GFX11-NEXT: s_mov_b32 s2, -1 993; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 994; GFX11-NEXT: s_setpc_b64 s[30:31] 995 store <2 x i16> %arg0, ptr addrspace(1) undef 996 ret void 997} 998 999define void @void_func_v3i8(<3 x i8> %arg0) #0 { 1000; CI-LABEL: void_func_v3i8: 1001; CI: ; %bb.0: 1002; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1003; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1004; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1005; CI-NEXT: s_mov_b32 s5, 0 1006; CI-NEXT: s_mov_b32 s4, 2 1007; CI-NEXT: s_mov_b32 s7, 0xf000 1008; CI-NEXT: s_mov_b32 s6, -1 1009; CI-NEXT: v_or_b32_e32 v0, v0, v1 1010; CI-NEXT: buffer_store_byte v2, off, s[4:7], 0 1011; CI-NEXT: s_mov_b32 s4, s5 1012; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 1013; CI-NEXT: s_waitcnt vmcnt(0) 1014; CI-NEXT: s_setpc_b64 s[30:31] 1015; 1016; GFX89-LABEL: void_func_v3i8: 1017; GFX89: ; %bb.0: 1018; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1019; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1020; GFX89-NEXT: s_mov_b32 s5, 0 1021; GFX89-NEXT: s_mov_b32 s4, 2 1022; GFX89-NEXT: s_mov_b32 s7, 0xf000 1023; GFX89-NEXT: s_mov_b32 s6, -1 1024; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1025; GFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 1026; GFX89-NEXT: s_mov_b32 s4, s5 1027; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 1028; GFX89-NEXT: s_waitcnt vmcnt(0) 1029; GFX89-NEXT: s_setpc_b64 s[30:31] 1030; 1031; GFX11-LABEL: void_func_v3i8: 1032; GFX11: ; %bb.0: 1033; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1035; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1036; GFX11-NEXT: s_mov_b32 s1, 0 1037; GFX11-NEXT: s_mov_b32 s0, 2 1038; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1039; GFX11-NEXT: s_mov_b32 s2, -1 1040; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1041; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 1042; GFX11-NEXT: s_mov_b32 s0, s1 1043; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 1044; GFX11-NEXT: s_setpc_b64 s[30:31] 1045 store <3 x i8> %arg0, ptr addrspace(1) null 1046 ret void 1047} 1048 1049define void @void_func_v4i8(<4 x i8> %arg0) #0 { 1050; CI-LABEL: void_func_v4i8: 1051; CI: ; %bb.0: 1052; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1053; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 1054; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1055; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1056; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1057; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1058; CI-NEXT: v_or_b32_e32 v0, v0, v1 1059; CI-NEXT: v_or_b32_e32 v2, v3, v2 1060; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1061; CI-NEXT: s_mov_b32 s4, 0 1062; CI-NEXT: v_or_b32_e32 v0, v0, v2 1063; CI-NEXT: s_mov_b32 s7, 0xf000 1064; CI-NEXT: s_mov_b32 s6, -1 1065; CI-NEXT: s_mov_b32 s5, s4 1066; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1067; CI-NEXT: s_waitcnt vmcnt(0) 1068; CI-NEXT: s_setpc_b64 s[30:31] 1069; 1070; GFX89-LABEL: void_func_v4i8: 1071; GFX89: ; %bb.0: 1072; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1073; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1074; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1075; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 1076; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1077; GFX89-NEXT: s_mov_b32 s4, 0 1078; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1079; GFX89-NEXT: s_mov_b32 s7, 0xf000 1080; GFX89-NEXT: s_mov_b32 s6, -1 1081; GFX89-NEXT: s_mov_b32 s5, s4 1082; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 1083; GFX89-NEXT: s_waitcnt vmcnt(0) 1084; GFX89-NEXT: s_setpc_b64 s[30:31] 1085; 1086; GFX11-LABEL: void_func_v4i8: 1087; GFX11: ; %bb.0: 1088; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1089; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1090; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1091; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 1092; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 1093; GFX11-NEXT: s_mov_b32 s0, 0 1094; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1095; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1096; GFX11-NEXT: s_mov_b32 s2, -1 1097; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1098; GFX11-NEXT: s_mov_b32 s1, s0 1099; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 1100; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 1101; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1102; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1103; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1104; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1105; GFX11-NEXT: s_setpc_b64 s[30:31] 1106 store <4 x i8> %arg0, ptr addrspace(1) null 1107 ret void 1108} 1109 1110define void @void_func_v5i8(<5 x i8> %arg0) #0 { 1111; CI-LABEL: void_func_v5i8: 1112; CI: ; %bb.0: 1113; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1114; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 1115; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1116; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1117; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1118; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1119; CI-NEXT: v_or_b32_e32 v0, v0, v1 1120; CI-NEXT: v_or_b32_e32 v2, v3, v2 1121; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1122; CI-NEXT: s_mov_b32 s5, 0 1123; CI-NEXT: s_mov_b32 s4, 4 1124; CI-NEXT: s_mov_b32 s7, 0xf000 1125; CI-NEXT: s_mov_b32 s6, -1 1126; CI-NEXT: v_or_b32_e32 v0, v0, v2 1127; CI-NEXT: buffer_store_byte v4, off, s[4:7], 0 1128; CI-NEXT: s_mov_b32 s4, s5 1129; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1130; CI-NEXT: s_waitcnt vmcnt(0) 1131; CI-NEXT: s_setpc_b64 s[30:31] 1132; 1133; GFX89-LABEL: void_func_v5i8: 1134; GFX89: ; %bb.0: 1135; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1136; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1137; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1138; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 1139; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1140; GFX89-NEXT: s_mov_b32 s5, 0 1141; GFX89-NEXT: s_mov_b32 s4, 4 1142; GFX89-NEXT: s_mov_b32 s7, 0xf000 1143; GFX89-NEXT: s_mov_b32 s6, -1 1144; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1145; GFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 1146; GFX89-NEXT: s_mov_b32 s4, s5 1147; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 1148; GFX89-NEXT: s_waitcnt vmcnt(0) 1149; GFX89-NEXT: s_setpc_b64 s[30:31] 1150; 1151; GFX11-LABEL: void_func_v5i8: 1152; GFX11: ; %bb.0: 1153; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1154; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1155; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1156; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 1157; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 1158; GFX11-NEXT: s_mov_b32 s1, 0 1159; GFX11-NEXT: s_mov_b32 s0, 4 1160; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1161; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1162; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1163; GFX11-NEXT: s_mov_b32 s2, -1 1164; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1) 1165; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 1166; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 1167; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1168; GFX11-NEXT: s_mov_b32 s0, s1 1169; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1170; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1171; GFX11-NEXT: s_setpc_b64 s[30:31] 1172 store <5 x i8> %arg0, ptr addrspace(1) null 1173 ret void 1174} 1175 1176define void @void_func_v8i8(<8 x i8> %arg0) #0 { 1177; CI-LABEL: void_func_v8i8: 1178; CI: ; %bb.0: 1179; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1180; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 1181; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1182; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 1183; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 1184; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1185; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1186; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1187; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 1188; CI-NEXT: v_or_b32_e32 v4, v4, v5 1189; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1190; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1191; CI-NEXT: v_or_b32_e32 v0, v0, v1 1192; CI-NEXT: v_or_b32_e32 v6, v7, v6 1193; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 1194; CI-NEXT: v_or_b32_e32 v2, v3, v2 1195; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1196; CI-NEXT: s_mov_b32 s4, 0 1197; CI-NEXT: v_or_b32_e32 v4, v4, v6 1198; CI-NEXT: v_or_b32_e32 v3, v0, v2 1199; CI-NEXT: s_mov_b32 s7, 0xf000 1200; CI-NEXT: s_mov_b32 s6, -1 1201; CI-NEXT: s_mov_b32 s5, s4 1202; CI-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 1203; CI-NEXT: s_waitcnt vmcnt(0) 1204; CI-NEXT: s_setpc_b64 s[30:31] 1205; 1206; GFX89-LABEL: void_func_v8i8: 1207; GFX89: ; %bb.0: 1208; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 1210; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1211; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1212; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 1213; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1214; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 1215; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1216; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1217; GFX89-NEXT: s_mov_b32 s4, 0 1218; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1219; GFX89-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1220; GFX89-NEXT: s_mov_b32 s7, 0xf000 1221; GFX89-NEXT: s_mov_b32 s6, -1 1222; GFX89-NEXT: s_mov_b32 s5, s4 1223; GFX89-NEXT: buffer_store_dwordx2 v[3:4], off, s[4:7], 0 1224; GFX89-NEXT: s_waitcnt vmcnt(0) 1225; GFX89-NEXT: s_setpc_b64 s[30:31] 1226; 1227; GFX11-LABEL: void_func_v8i8: 1228; GFX11: ; %bb.0: 1229; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 1231; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 1232; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 1233; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1234; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1235; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1236; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 1237; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 1238; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 1239; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 1240; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1241; GFX11-NEXT: s_mov_b32 s0, 0 1242; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1243; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v4 1244; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v5 1245; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 1246; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1247; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v1 1248; GFX11-NEXT: s_mov_b32 s2, -1 1249; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1250; GFX11-NEXT: s_mov_b32 s1, s0 1251; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1252; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 1253; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 1254; GFX11-NEXT: s_setpc_b64 s[30:31] 1255 store <8 x i8> %arg0, ptr addrspace(1) null 1256 ret void 1257} 1258 1259define void @void_func_v16i8(<16 x i8> %arg0) #0 { 1260; CI-LABEL: void_func_v16i8: 1261; CI: ; %bb.0: 1262; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1263; CI-NEXT: v_and_b32_e32 v14, 0xff, v14 1264; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 1265; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 1266; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 1267; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 1268; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 1269; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 1270; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1271; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 1272; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 1273; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1274; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1275; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v15 1276; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v14 1277; CI-NEXT: v_or_b32_e32 v12, v12, v13 1278; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 1279; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1280; CI-NEXT: v_or_b32_e32 v8, v8, v9 1281; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1282; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 1283; CI-NEXT: v_or_b32_e32 v4, v4, v5 1284; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1285; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1286; CI-NEXT: v_or_b32_e32 v0, v0, v1 1287; CI-NEXT: v_or_b32_e32 v14, v15, v14 1288; CI-NEXT: v_and_b32_e32 v12, 0xffff, v12 1289; CI-NEXT: v_or_b32_e32 v10, v11, v10 1290; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 1291; CI-NEXT: v_or_b32_e32 v6, v7, v6 1292; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 1293; CI-NEXT: v_or_b32_e32 v2, v3, v2 1294; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1295; CI-NEXT: s_mov_b32 s4, 0 1296; CI-NEXT: v_or_b32_e32 v12, v12, v14 1297; CI-NEXT: v_or_b32_e32 v11, v8, v10 1298; CI-NEXT: v_or_b32_e32 v10, v4, v6 1299; CI-NEXT: v_or_b32_e32 v9, v0, v2 1300; CI-NEXT: s_mov_b32 s7, 0xf000 1301; CI-NEXT: s_mov_b32 s6, -1 1302; CI-NEXT: s_mov_b32 s5, s4 1303; CI-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 1304; CI-NEXT: s_waitcnt vmcnt(0) 1305; CI-NEXT: s_setpc_b64 s[30:31] 1306; 1307; GFX89-LABEL: void_func_v16i8: 1308; GFX89: ; %bb.0: 1309; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1310; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 1311; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 1312; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 1313; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1314; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1315; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 1316; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1317; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 1318; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1319; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v7 1320; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1321; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v3 1322; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1323; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1324; GFX89-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1325; GFX89-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1326; GFX89-NEXT: s_mov_b32 s4, 0 1327; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1328; GFX89-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1329; GFX89-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1330; GFX89-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1331; GFX89-NEXT: s_mov_b32 s7, 0xf000 1332; GFX89-NEXT: s_mov_b32 s6, -1 1333; GFX89-NEXT: s_mov_b32 s5, s4 1334; GFX89-NEXT: buffer_store_dwordx4 v[9:12], off, s[4:7], 0 1335; GFX89-NEXT: s_waitcnt vmcnt(0) 1336; GFX89-NEXT: s_setpc_b64 s[30:31] 1337; 1338; GFX11-LABEL: void_func_v16i8: 1339; GFX11: ; %bb.0: 1340; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1341; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 1342; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 1343; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 1344; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 1345; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 1346; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 1347; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 1348; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 1349; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 1350; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 1351; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 1352; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1353; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1354; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1355; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 1356; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 1357; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 1358; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 1359; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 1360; GFX11-NEXT: v_or_b32_e32 v10, v10, v11 1361; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 1362; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 1363; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1364; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1365; GFX11-NEXT: v_and_b32_e32 v9, 0xffff, v12 1366; GFX11-NEXT: v_lshlrev_b32_e32 v12, 16, v13 1367; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 1368; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v10 1369; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 1370; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1371; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 1372; GFX11-NEXT: v_lshlrev_b32_e32 v6, 16, v1 1373; GFX11-NEXT: v_or_b32_e32 v3, v9, v12 1374; GFX11-NEXT: v_or_b32_e32 v2, v8, v2 1375; GFX11-NEXT: v_or_b32_e32 v1, v4, v5 1376; GFX11-NEXT: s_mov_b32 s0, 0 1377; GFX11-NEXT: v_or_b32_e32 v0, v0, v6 1378; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1379; GFX11-NEXT: s_mov_b32 s2, -1 1380; GFX11-NEXT: s_mov_b32 s1, s0 1381; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1382; GFX11-NEXT: s_setpc_b64 s[30:31] 1383 store <16 x i8> %arg0, ptr addrspace(1) null 1384 ret void 1385} 1386 1387define void @void_func_v32i8(<32 x i8> %arg0) #0 { 1388; CI-LABEL: void_func_v32i8: 1389; CI: ; %bb.0: 1390; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1391; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1392; CI-NEXT: v_and_b32_e32 v4, 0xff, v4 1393; CI-NEXT: v_or_b32_e32 v4, v4, v5 1394; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 1395; CI-NEXT: v_lshlrev_b32_e32 v9, 8, v9 1396; CI-NEXT: v_and_b32_e32 v8, 0xff, v8 1397; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v13 1398; CI-NEXT: v_and_b32_e32 v12, 0xff, v12 1399; CI-NEXT: v_or_b32_e32 v8, v8, v9 1400; CI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1401; CI-NEXT: v_and_b32_e32 v0, 0xff, v0 1402; CI-NEXT: v_and_b32_e32 v9, 0xff, v14 1403; CI-NEXT: v_and_b32_e32 v10, 0xff, v10 1404; CI-NEXT: v_and_b32_e32 v6, 0xff, v6 1405; CI-NEXT: v_or_b32_e32 v12, v12, v13 1406; CI-NEXT: v_or_b32_e32 v0, v0, v1 1407; CI-NEXT: v_lshlrev_b32_e32 v1, 24, v15 1408; CI-NEXT: v_lshlrev_b32_e32 v11, 24, v11 1409; CI-NEXT: v_lshlrev_b32_e32 v7, 24, v7 1410; CI-NEXT: v_and_b32_e32 v2, 0xff, v2 1411; CI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1412; CI-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1413; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v6 1414; CI-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1415; CI-NEXT: v_lshlrev_b32_e32 v13, 8, v29 1416; CI-NEXT: v_and_b32_e32 v14, 0xff, v28 1417; CI-NEXT: v_and_b32_e32 v26, 0xff, v26 1418; CI-NEXT: v_lshlrev_b32_e32 v25, 8, v25 1419; CI-NEXT: v_and_b32_e32 v24, 0xff, v24 1420; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1421; CI-NEXT: v_or_b32_e32 v1, v1, v9 1422; CI-NEXT: v_or_b32_e32 v9, v11, v10 1423; CI-NEXT: v_and_b32_e32 v10, 0xffff, v12 1424; CI-NEXT: v_or_b32_e32 v6, v7, v6 1425; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 1426; CI-NEXT: v_lshlrev_b32_e32 v15, 24, v27 1427; CI-NEXT: v_and_b32_e32 v27, 0xff, v30 1428; CI-NEXT: v_or_b32_e32 v13, v14, v13 1429; CI-NEXT: v_lshlrev_b32_e32 v14, 16, v26 1430; CI-NEXT: v_or_b32_e32 v7, v3, v2 1431; CI-NEXT: v_or_b32_e32 v3, v10, v1 1432; CI-NEXT: v_or_b32_e32 v1, v4, v6 1433; CI-NEXT: v_lshlrev_b32_e32 v26, 16, v27 1434; CI-NEXT: v_or_b32_e32 v11, v15, v14 1435; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1436; CI-NEXT: v_and_b32_e32 v12, 0xffff, v13 1437; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 1438; CI-NEXT: v_or_b32_e32 v0, v0, v7 1439; CI-NEXT: v_or_b32_e32 v2, v8, v9 1440; CI-NEXT: v_and_b32_e32 v8, 0xff, v20 1441; CI-NEXT: v_and_b32_e32 v9, 0xff, v16 1442; CI-NEXT: s_mov_b32 s5, 0 1443; CI-NEXT: s_mov_b32 s4, 16 1444; CI-NEXT: s_mov_b32 s7, 0xf000 1445; CI-NEXT: s_mov_b32 s6, -1 1446; CI-NEXT: s_waitcnt vmcnt(0) 1447; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v5 1448; CI-NEXT: v_or_b32_e32 v5, v24, v25 1449; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5 1450; CI-NEXT: v_or_b32_e32 v4, v4, v26 1451; CI-NEXT: v_or_b32_e32 v6, v5, v11 1452; CI-NEXT: v_and_b32_e32 v5, 0xff, v22 1453; CI-NEXT: v_or_b32_e32 v7, v12, v4 1454; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v23 1455; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1456; CI-NEXT: v_or_b32_e32 v4, v4, v5 1457; CI-NEXT: v_lshlrev_b32_e32 v5, 8, v21 1458; CI-NEXT: v_or_b32_e32 v5, v8, v5 1459; CI-NEXT: v_and_b32_e32 v5, 0xffff, v5 1460; CI-NEXT: v_and_b32_e32 v8, 0xff, v18 1461; CI-NEXT: v_or_b32_e32 v5, v5, v4 1462; CI-NEXT: v_lshlrev_b32_e32 v4, 24, v19 1463; CI-NEXT: v_lshlrev_b32_e32 v8, 16, v8 1464; CI-NEXT: v_or_b32_e32 v4, v4, v8 1465; CI-NEXT: v_lshlrev_b32_e32 v8, 8, v17 1466; CI-NEXT: v_or_b32_e32 v8, v9, v8 1467; CI-NEXT: v_and_b32_e32 v8, 0xffff, v8 1468; CI-NEXT: v_or_b32_e32 v4, v8, v4 1469; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1470; CI-NEXT: s_mov_b32 s4, s5 1471; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1472; CI-NEXT: s_waitcnt vmcnt(0) 1473; CI-NEXT: s_setpc_b64 s[30:31] 1474; 1475; GFX89-LABEL: void_func_v32i8: 1476; GFX89: ; %bb.0: 1477; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1478; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v9 1479; GFX89-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1480; GFX89-NEXT: v_lshlrev_b16_e32 v9, 8, v11 1481; GFX89-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1482; GFX89-NEXT: buffer_load_ubyte v10, off, s[0:3], s32 1483; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v13 1484; GFX89-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1485; GFX89-NEXT: v_lshlrev_b16_e32 v13, 8, v15 1486; GFX89-NEXT: v_lshlrev_b16_e32 v5, 8, v5 1487; GFX89-NEXT: v_lshlrev_b16_e32 v7, 8, v7 1488; GFX89-NEXT: v_lshlrev_b16_e32 v3, 8, v3 1489; GFX89-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1490; GFX89-NEXT: v_lshlrev_b16_e32 v1, 8, v1 1491; GFX89-NEXT: v_lshlrev_b16_e32 v11, 8, v29 1492; GFX89-NEXT: v_lshlrev_b16_e32 v14, 8, v25 1493; GFX89-NEXT: v_lshlrev_b16_e32 v15, 8, v27 1494; GFX89-NEXT: v_lshlrev_b16_e32 v21, 8, v21 1495; GFX89-NEXT: v_lshlrev_b16_e32 v23, 8, v23 1496; GFX89-NEXT: v_lshlrev_b16_e32 v17, 8, v17 1497; GFX89-NEXT: v_lshlrev_b16_e32 v19, 8, v19 1498; GFX89-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1499; GFX89-NEXT: v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1500; GFX89-NEXT: v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1501; GFX89-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1502; GFX89-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1503; GFX89-NEXT: v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1504; GFX89-NEXT: v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1505; GFX89-NEXT: v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1506; GFX89-NEXT: v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1507; GFX89-NEXT: v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1508; GFX89-NEXT: v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1509; GFX89-NEXT: v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1510; GFX89-NEXT: s_mov_b32 s5, 0 1511; GFX89-NEXT: s_mov_b32 s4, 16 1512; GFX89-NEXT: s_mov_b32 s7, 0xf000 1513; GFX89-NEXT: s_mov_b32 s6, -1 1514; GFX89-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1515; GFX89-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1516; GFX89-NEXT: v_or_b32_sdwa v6, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1517; GFX89-NEXT: v_or_b32_sdwa v5, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1518; GFX89-NEXT: v_or_b32_sdwa v4, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1519; GFX89-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1520; GFX89-NEXT: s_waitcnt vmcnt(0) 1521; GFX89-NEXT: v_lshlrev_b16_e32 v8, 8, v10 1522; GFX89-NEXT: v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1523; GFX89-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1524; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1525; GFX89-NEXT: s_mov_b32 s4, s5 1526; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1527; GFX89-NEXT: s_waitcnt vmcnt(0) 1528; GFX89-NEXT: s_setpc_b64 s[30:31] 1529; 1530; GFX11-LABEL: void_func_v32i8: 1531; GFX11: ; %bb.0: 1532; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1533; GFX11-NEXT: scratch_load_u8 v31, off, s32 1534; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 1535; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 1536; GFX11-NEXT: v_lshlrev_b16 v3, 8, v3 1537; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v2 1538; GFX11-NEXT: v_lshlrev_b16 v9, 8, v9 1539; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 1540; GFX11-NEXT: v_lshlrev_b16 v11, 8, v11 1541; GFX11-NEXT: v_and_b32_e32 v10, 0xff, v10 1542; GFX11-NEXT: v_lshlrev_b16 v17, 8, v17 1543; GFX11-NEXT: v_and_b32_e32 v16, 0xff, v16 1544; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1545; GFX11-NEXT: v_or_b32_e32 v1, v2, v3 1546; GFX11-NEXT: v_lshlrev_b16 v13, 8, v13 1547; GFX11-NEXT: v_and_b32_e32 v12, 0xff, v12 1548; GFX11-NEXT: v_lshlrev_b16 v15, 8, v15 1549; GFX11-NEXT: v_and_b32_e32 v14, 0xff, v14 1550; GFX11-NEXT: v_lshlrev_b16 v5, 8, v5 1551; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4 1552; GFX11-NEXT: v_lshlrev_b16 v7, 8, v7 1553; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1554; GFX11-NEXT: v_lshlrev_b16 v29, 8, v29 1555; GFX11-NEXT: v_and_b32_e32 v28, 0xff, v28 1556; GFX11-NEXT: v_and_b32_e32 v30, 0xff, v30 1557; GFX11-NEXT: v_lshlrev_b16 v25, 8, v25 1558; GFX11-NEXT: v_and_b32_e32 v24, 0xff, v24 1559; GFX11-NEXT: v_lshlrev_b16 v27, 8, v27 1560; GFX11-NEXT: v_and_b32_e32 v26, 0xff, v26 1561; GFX11-NEXT: v_lshlrev_b16 v21, 8, v21 1562; GFX11-NEXT: v_and_b32_e32 v20, 0xff, v20 1563; GFX11-NEXT: v_lshlrev_b16 v23, 8, v23 1564; GFX11-NEXT: v_and_b32_e32 v22, 0xff, v22 1565; GFX11-NEXT: v_lshlrev_b16 v19, 8, v19 1566; GFX11-NEXT: v_and_b32_e32 v18, 0xff, v18 1567; GFX11-NEXT: v_or_b32_e32 v8, v8, v9 1568; GFX11-NEXT: v_or_b32_e32 v9, v10, v11 1569; GFX11-NEXT: v_or_b32_e32 v11, v16, v17 1570; GFX11-NEXT: v_lshlrev_b32_e32 v17, 16, v1 1571; GFX11-NEXT: v_or_b32_e32 v12, v12, v13 1572; GFX11-NEXT: v_or_b32_e32 v13, v14, v15 1573; GFX11-NEXT: v_or_b32_e32 v4, v4, v5 1574; GFX11-NEXT: v_or_b32_e32 v5, v6, v7 1575; GFX11-NEXT: v_or_b32_e32 v2, v28, v29 1576; GFX11-NEXT: v_or_b32_e32 v3, v24, v25 1577; GFX11-NEXT: v_or_b32_e32 v6, v26, v27 1578; GFX11-NEXT: v_or_b32_e32 v7, v20, v21 1579; GFX11-NEXT: v_or_b32_e32 v10, v22, v23 1580; GFX11-NEXT: v_or_b32_e32 v14, v18, v19 1581; GFX11-NEXT: v_and_b32_e32 v15, 0xffff, v4 1582; GFX11-NEXT: v_lshlrev_b32_e32 v16, 16, v5 1583; GFX11-NEXT: v_and_b32_e32 v18, 0xffff, v2 1584; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v3 1585; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v6 1586; GFX11-NEXT: v_and_b32_e32 v7, 0xffff, v7 1587; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v10 1588; GFX11-NEXT: v_and_b32_e32 v11, 0xffff, v11 1589; GFX11-NEXT: v_lshlrev_b32_e32 v14, 16, v14 1590; GFX11-NEXT: v_and_b32_e32 v12, 0xffff, v12 1591; GFX11-NEXT: v_lshlrev_b32_e32 v13, 16, v13 1592; GFX11-NEXT: v_and_b32_e32 v8, 0xffff, v8 1593; GFX11-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1594; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 1595; GFX11-NEXT: v_or_b32_e32 v6, v4, v5 1596; GFX11-NEXT: v_or_b32_e32 v5, v7, v10 1597; GFX11-NEXT: v_or_b32_e32 v4, v11, v14 1598; GFX11-NEXT: v_or_b32_e32 v3, v12, v13 1599; GFX11-NEXT: v_or_b32_e32 v2, v8, v9 1600; GFX11-NEXT: v_or_b32_e32 v0, v0, v17 1601; GFX11-NEXT: s_mov_b32 s1, 0 1602; GFX11-NEXT: s_mov_b32 s0, 16 1603; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1604; GFX11-NEXT: s_mov_b32 s2, -1 1605; GFX11-NEXT: s_waitcnt vmcnt(0) 1606; GFX11-NEXT: v_lshlrev_b16 v1, 8, v31 1607; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1608; GFX11-NEXT: v_or_b32_e32 v1, v30, v1 1609; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1610; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1611; GFX11-NEXT: v_or_b32_e32 v7, v18, v1 1612; GFX11-NEXT: v_or_b32_e32 v1, v15, v16 1613; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 1614; GFX11-NEXT: s_mov_b32 s0, s1 1615; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1616; GFX11-NEXT: s_setpc_b64 s[30:31] 1617 store <32 x i8> %arg0, ptr addrspace(1) null 1618 ret void 1619} 1620 1621define void @void_func_v3i16(<3 x i16> %arg0) #0 { 1622; CI-LABEL: void_func_v3i16: 1623; CI: ; %bb.0: 1624; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1626; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1627; CI-NEXT: s_mov_b32 s7, 0xf000 1628; CI-NEXT: s_mov_b32 s6, -1 1629; CI-NEXT: v_or_b32_e32 v0, v0, v1 1630; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 1631; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1632; CI-NEXT: s_waitcnt vmcnt(0) 1633; CI-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX89-LABEL: void_func_v3i16: 1636; GFX89: ; %bb.0: 1637; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX89-NEXT: s_mov_b32 s7, 0xf000 1639; GFX89-NEXT: s_mov_b32 s6, -1 1640; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 1641; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 1642; GFX89-NEXT: s_waitcnt vmcnt(0) 1643; GFX89-NEXT: s_setpc_b64 s[30:31] 1644; 1645; GFX11-LABEL: void_func_v3i16: 1646; GFX11: ; %bb.0: 1647; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1648; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1649; GFX11-NEXT: s_mov_b32 s2, -1 1650; GFX11-NEXT: s_clause 0x1 1651; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 1652; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1653; GFX11-NEXT: s_setpc_b64 s[30:31] 1654 store <3 x i16> %arg0, ptr addrspace(1) undef 1655 ret void 1656} 1657 1658define void @void_func_v4i16(<4 x i16> %arg0) #0 { 1659; CI-LABEL: void_func_v4i16: 1660; CI: ; %bb.0: 1661; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1663; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1664; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1665; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1666; CI-NEXT: v_or_b32_e32 v2, v2, v3 1667; CI-NEXT: v_or_b32_e32 v1, v0, v1 1668; CI-NEXT: s_mov_b32 s7, 0xf000 1669; CI-NEXT: s_mov_b32 s6, -1 1670; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 1671; CI-NEXT: s_waitcnt vmcnt(0) 1672; CI-NEXT: s_setpc_b64 s[30:31] 1673; 1674; GFX89-LABEL: void_func_v4i16: 1675; GFX89: ; %bb.0: 1676; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1677; GFX89-NEXT: s_mov_b32 s7, 0xf000 1678; GFX89-NEXT: s_mov_b32 s6, -1 1679; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1680; GFX89-NEXT: s_waitcnt vmcnt(0) 1681; GFX89-NEXT: s_setpc_b64 s[30:31] 1682; 1683; GFX11-LABEL: void_func_v4i16: 1684; GFX11: ; %bb.0: 1685; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1686; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1687; GFX11-NEXT: s_mov_b32 s2, -1 1688; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 1689; GFX11-NEXT: s_setpc_b64 s[30:31] 1690 store <4 x i16> %arg0, ptr addrspace(1) undef 1691 ret void 1692} 1693 1694define void @void_func_v5i16(<5 x i16> %arg0) #0 { 1695; CI-LABEL: void_func_v5i16: 1696; CI: ; %bb.0: 1697; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1698; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1699; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1700; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1701; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1702; CI-NEXT: s_mov_b32 s7, 0xf000 1703; CI-NEXT: s_mov_b32 s6, -1 1704; CI-NEXT: v_or_b32_e32 v2, v2, v3 1705; CI-NEXT: v_or_b32_e32 v1, v0, v1 1706; CI-NEXT: buffer_store_short v4, off, s[4:7], 0 1707; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 1708; CI-NEXT: s_waitcnt vmcnt(0) 1709; CI-NEXT: s_setpc_b64 s[30:31] 1710; 1711; GFX89-LABEL: void_func_v5i16: 1712; GFX89: ; %bb.0: 1713; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1714; GFX89-NEXT: s_mov_b32 s7, 0xf000 1715; GFX89-NEXT: s_mov_b32 s6, -1 1716; GFX89-NEXT: buffer_store_short v2, off, s[4:7], 0 1717; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1718; GFX89-NEXT: s_waitcnt vmcnt(0) 1719; GFX89-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX11-LABEL: void_func_v5i16: 1722; GFX11: ; %bb.0: 1723; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1725; GFX11-NEXT: s_mov_b32 s2, -1 1726; GFX11-NEXT: s_clause 0x1 1727; GFX11-NEXT: buffer_store_b16 v2, off, s[0:3], 0 1728; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 1729; GFX11-NEXT: s_setpc_b64 s[30:31] 1730 store <5 x i16> %arg0, ptr addrspace(1) undef 1731 ret void 1732} 1733 1734define void @void_func_v8i16(<8 x i16> %arg0) #0 { 1735; CI-LABEL: void_func_v8i16: 1736; CI: ; %bb.0: 1737; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1738; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 1739; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 1740; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1741; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 1742; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1743; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1744; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1745; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1746; CI-NEXT: v_or_b32_e32 v6, v6, v7 1747; CI-NEXT: v_or_b32_e32 v5, v4, v5 1748; CI-NEXT: v_or_b32_e32 v4, v2, v3 1749; CI-NEXT: v_or_b32_e32 v3, v0, v1 1750; CI-NEXT: s_mov_b32 s7, 0xf000 1751; CI-NEXT: s_mov_b32 s6, -1 1752; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 1753; CI-NEXT: s_waitcnt vmcnt(0) 1754; CI-NEXT: s_setpc_b64 s[30:31] 1755; 1756; GFX89-LABEL: void_func_v8i16: 1757; GFX89: ; %bb.0: 1758; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; GFX89-NEXT: s_mov_b32 s7, 0xf000 1760; GFX89-NEXT: s_mov_b32 s6, -1 1761; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1762; GFX89-NEXT: s_waitcnt vmcnt(0) 1763; GFX89-NEXT: s_setpc_b64 s[30:31] 1764; 1765; GFX11-LABEL: void_func_v8i16: 1766; GFX11: ; %bb.0: 1767; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1768; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1769; GFX11-NEXT: s_mov_b32 s2, -1 1770; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1771; GFX11-NEXT: s_setpc_b64 s[30:31] 1772 store <8 x i16> %arg0, ptr addrspace(1) undef 1773 ret void 1774} 1775 1776define void @void_func_v16i16(<16 x i16> %arg0) #0 { 1777; CI-LABEL: void_func_v16i16: 1778; CI: ; %bb.0: 1779; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1780; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1781; CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 1782; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1783; CI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1784; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1785; CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1786; CI-NEXT: v_or_b32_e32 v5, v4, v5 1787; CI-NEXT: v_or_b32_e32 v4, v2, v3 1788; CI-NEXT: v_or_b32_e32 v3, v0, v1 1789; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v15 1790; CI-NEXT: v_and_b32_e32 v1, 0xffff, v14 1791; CI-NEXT: v_or_b32_e32 v14, v1, v0 1792; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v13 1793; CI-NEXT: v_and_b32_e32 v1, 0xffff, v12 1794; CI-NEXT: v_or_b32_e32 v13, v1, v0 1795; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v11 1796; CI-NEXT: v_and_b32_e32 v1, 0xffff, v10 1797; CI-NEXT: v_or_b32_e32 v12, v1, v0 1798; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v9 1799; CI-NEXT: v_and_b32_e32 v1, 0xffff, v8 1800; CI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 1801; CI-NEXT: v_and_b32_e32 v6, 0xffff, v6 1802; CI-NEXT: v_or_b32_e32 v11, v1, v0 1803; CI-NEXT: s_mov_b32 s7, 0xf000 1804; CI-NEXT: s_mov_b32 s6, -1 1805; CI-NEXT: v_or_b32_e32 v6, v6, v7 1806; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 1807; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 1808; CI-NEXT: s_waitcnt vmcnt(0) 1809; CI-NEXT: s_setpc_b64 s[30:31] 1810; 1811; GFX89-LABEL: void_func_v16i16: 1812; GFX89: ; %bb.0: 1813; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1814; GFX89-NEXT: s_mov_b32 s7, 0xf000 1815; GFX89-NEXT: s_mov_b32 s6, -1 1816; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1817; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1818; GFX89-NEXT: s_waitcnt vmcnt(0) 1819; GFX89-NEXT: s_setpc_b64 s[30:31] 1820; 1821; GFX11-LABEL: void_func_v16i16: 1822; GFX11: ; %bb.0: 1823; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1824; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1825; GFX11-NEXT: s_mov_b32 s2, -1 1826; GFX11-NEXT: s_clause 0x1 1827; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 1828; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1829; GFX11-NEXT: s_setpc_b64 s[30:31] 1830 store <16 x i16> %arg0, ptr addrspace(1) undef 1831 ret void 1832} 1833 1834define void @void_func_v2i24(<2 x i24> %arg0) #0 { 1835; CI-LABEL: void_func_v2i24: 1836; CI: ; %bb.0: 1837; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1838; CI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1839; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1840; CI-NEXT: s_mov_b32 s7, 0xf000 1841; CI-NEXT: s_mov_b32 s6, -1 1842; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 1843; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 1844; CI-NEXT: s_waitcnt vmcnt(0) 1845; CI-NEXT: s_setpc_b64 s[30:31] 1846; 1847; VI-LABEL: void_func_v2i24: 1848; VI: ; %bb.0: 1849; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1850; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 1851; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1852; VI-NEXT: s_mov_b32 s7, 0xf000 1853; VI-NEXT: s_mov_b32 s6, -1 1854; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 1855; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 1856; VI-NEXT: s_waitcnt vmcnt(0) 1857; VI-NEXT: s_setpc_b64 s[30:31] 1858; 1859; GFX9-LABEL: void_func_v2i24: 1860; GFX9: ; %bb.0: 1861; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1862; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1863; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1864; GFX9-NEXT: s_mov_b32 s7, 0xf000 1865; GFX9-NEXT: s_mov_b32 s6, -1 1866; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 1867; GFX9-NEXT: buffer_store_short v0, off, s[4:7], 0 1868; GFX9-NEXT: s_waitcnt vmcnt(0) 1869; GFX9-NEXT: s_setpc_b64 s[30:31] 1870; 1871; GFX11-LABEL: void_func_v2i24: 1872; GFX11: ; %bb.0: 1873; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1874; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 1875; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1876; GFX11-NEXT: s_mov_b32 s2, -1 1877; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1878; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1879; GFX11-NEXT: s_clause 0x1 1880; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 1881; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 1882; GFX11-NEXT: s_setpc_b64 s[30:31] 1883 %elt0 = extractelement <2 x i24> %arg0, i32 0 1884 %elt1 = extractelement <2 x i24> %arg0, i32 1 1885 %add = add i24 %elt0, %elt1 1886 store i24 %add, ptr addrspace(1) undef 1887 ret void 1888} 1889 1890define void @void_func_v2f32(<2 x float> %arg0) #0 { 1891; CIGFX89-LABEL: void_func_v2f32: 1892; CIGFX89: ; %bb.0: 1893; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1894; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 1895; CIGFX89-NEXT: s_mov_b32 s6, -1 1896; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1897; CIGFX89-NEXT: s_waitcnt vmcnt(0) 1898; CIGFX89-NEXT: s_setpc_b64 s[30:31] 1899; 1900; GFX11-LABEL: void_func_v2f32: 1901; GFX11: ; %bb.0: 1902; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1903; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1904; GFX11-NEXT: s_mov_b32 s2, -1 1905; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 1906; GFX11-NEXT: s_setpc_b64 s[30:31] 1907 store <2 x float> %arg0, ptr addrspace(1) undef 1908 ret void 1909} 1910 1911define void @void_func_v3f32(<3 x float> %arg0) #0 { 1912; CIGFX89-LABEL: void_func_v3f32: 1913; CIGFX89: ; %bb.0: 1914; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1915; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 1916; CIGFX89-NEXT: s_mov_b32 s6, -1 1917; CIGFX89-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1918; CIGFX89-NEXT: s_waitcnt vmcnt(0) 1919; CIGFX89-NEXT: s_setpc_b64 s[30:31] 1920; 1921; GFX11-LABEL: void_func_v3f32: 1922; GFX11: ; %bb.0: 1923; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1924; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1925; GFX11-NEXT: s_mov_b32 s2, -1 1926; GFX11-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 1927; GFX11-NEXT: s_setpc_b64 s[30:31] 1928 store <3 x float> %arg0, ptr addrspace(1) undef 1929 ret void 1930} 1931 1932define void @void_func_v4f32(<4 x float> %arg0) #0 { 1933; CIGFX89-LABEL: void_func_v4f32: 1934; CIGFX89: ; %bb.0: 1935; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1936; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 1937; CIGFX89-NEXT: s_mov_b32 s6, -1 1938; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1939; CIGFX89-NEXT: s_waitcnt vmcnt(0) 1940; CIGFX89-NEXT: s_setpc_b64 s[30:31] 1941; 1942; GFX11-LABEL: void_func_v4f32: 1943; GFX11: ; %bb.0: 1944; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1945; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1946; GFX11-NEXT: s_mov_b32 s2, -1 1947; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1948; GFX11-NEXT: s_setpc_b64 s[30:31] 1949 store <4 x float> %arg0, ptr addrspace(1) undef 1950 ret void 1951} 1952 1953define void @void_func_v8f32(<8 x float> %arg0) #0 { 1954; CIGFX89-LABEL: void_func_v8f32: 1955; CIGFX89: ; %bb.0: 1956; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1957; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 1958; CIGFX89-NEXT: s_mov_b32 s6, -1 1959; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1960; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1961; CIGFX89-NEXT: s_waitcnt vmcnt(0) 1962; CIGFX89-NEXT: s_setpc_b64 s[30:31] 1963; 1964; GFX11-LABEL: void_func_v8f32: 1965; GFX11: ; %bb.0: 1966; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1967; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1968; GFX11-NEXT: s_mov_b32 s2, -1 1969; GFX11-NEXT: s_clause 0x1 1970; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 1971; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1972; GFX11-NEXT: s_setpc_b64 s[30:31] 1973 store <8 x float> %arg0, ptr addrspace(1) undef 1974 ret void 1975} 1976 1977define void @void_func_v16f32(<16 x float> %arg0) #0 { 1978; CIGFX89-LABEL: void_func_v16f32: 1979; CIGFX89: ; %bb.0: 1980; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1981; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 1982; CIGFX89-NEXT: s_mov_b32 s6, -1 1983; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 1984; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 1985; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1986; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1987; CIGFX89-NEXT: s_waitcnt vmcnt(0) 1988; CIGFX89-NEXT: s_setpc_b64 s[30:31] 1989; 1990; GFX11-LABEL: void_func_v16f32: 1991; GFX11: ; %bb.0: 1992; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1993; GFX11-NEXT: s_mov_b32 s3, 0x31016000 1994; GFX11-NEXT: s_mov_b32 s2, -1 1995; GFX11-NEXT: s_clause 0x3 1996; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 1997; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 1998; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 1999; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2000; GFX11-NEXT: s_setpc_b64 s[30:31] 2001 store <16 x float> %arg0, ptr addrspace(1) undef 2002 ret void 2003} 2004 2005define void @void_func_v2f64(<2 x double> %arg0) #0 { 2006; CIGFX89-LABEL: void_func_v2f64: 2007; CIGFX89: ; %bb.0: 2008; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2009; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2010; CIGFX89-NEXT: s_mov_b32 s6, -1 2011; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2012; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2013; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2014; 2015; GFX11-LABEL: void_func_v2f64: 2016; GFX11: ; %bb.0: 2017; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2018; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2019; GFX11-NEXT: s_mov_b32 s2, -1 2020; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2021; GFX11-NEXT: s_setpc_b64 s[30:31] 2022 store <2 x double> %arg0, ptr addrspace(1) undef 2023 ret void 2024} 2025 2026define void @void_func_v3f64(<3 x double> %arg0) #0 { 2027; CIGFX89-LABEL: void_func_v3f64: 2028; CIGFX89: ; %bb.0: 2029; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2030; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2031; CIGFX89-NEXT: s_mov_b32 s6, -1 2032; CIGFX89-NEXT: buffer_store_dwordx2 v[4:5], off, s[4:7], 0 2033; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2034; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2035; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2036; 2037; GFX11-LABEL: void_func_v3f64: 2038; GFX11: ; %bb.0: 2039; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2040; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2041; GFX11-NEXT: s_mov_b32 s2, -1 2042; GFX11-NEXT: s_clause 0x1 2043; GFX11-NEXT: buffer_store_b64 v[4:5], off, s[0:3], 0 2044; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2045; GFX11-NEXT: s_setpc_b64 s[30:31] 2046 store <3 x double> %arg0, ptr addrspace(1) undef 2047 ret void 2048} 2049 2050define void @void_func_v4f64(<4 x double> %arg0) #0 { 2051; CIGFX89-LABEL: void_func_v4f64: 2052; CIGFX89: ; %bb.0: 2053; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2054; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2055; CIGFX89-NEXT: s_mov_b32 s6, -1 2056; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2057; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2058; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2059; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2060; 2061; GFX11-LABEL: void_func_v4f64: 2062; GFX11: ; %bb.0: 2063; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2064; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2065; GFX11-NEXT: s_mov_b32 s2, -1 2066; GFX11-NEXT: s_clause 0x1 2067; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 2068; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2069; GFX11-NEXT: s_setpc_b64 s[30:31] 2070 store <4 x double> %arg0, ptr addrspace(1) undef 2071 ret void 2072} 2073 2074define void @void_func_v8f64(<8 x double> %arg0) #0 { 2075; CIGFX89-LABEL: void_func_v8f64: 2076; CIGFX89: ; %bb.0: 2077; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2078; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2079; CIGFX89-NEXT: s_mov_b32 s6, -1 2080; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2081; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2082; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2083; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2084; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2085; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2086; 2087; GFX11-LABEL: void_func_v8f64: 2088; GFX11: ; %bb.0: 2089; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2090; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2091; GFX11-NEXT: s_mov_b32 s2, -1 2092; GFX11-NEXT: s_clause 0x3 2093; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 2094; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 2095; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 2096; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2097; GFX11-NEXT: s_setpc_b64 s[30:31] 2098 store <8 x double> %arg0, ptr addrspace(1) undef 2099 ret void 2100} 2101 2102define void @void_func_v16f64(<16 x double> %arg0) #0 { 2103; CIGFX89-LABEL: void_func_v16f64: 2104; CIGFX89: ; %bb.0: 2105; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2106; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 2107; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2108; CIGFX89-NEXT: s_mov_b32 s6, -1 2109; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2110; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2111; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2112; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2113; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2114; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2115; CIGFX89-NEXT: s_waitcnt vmcnt(6) 2116; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2117; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2118; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2119; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2120; 2121; GFX11-LABEL: void_func_v16f64: 2122; GFX11: ; %bb.0: 2123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2124; GFX11-NEXT: scratch_load_b32 v31, off, s32 2125; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2126; GFX11-NEXT: s_mov_b32 s2, -1 2127; GFX11-NEXT: s_clause 0x3 2128; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 2129; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 2130; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 2131; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 2132; GFX11-NEXT: s_waitcnt vmcnt(0) 2133; GFX11-NEXT: s_clause 0x3 2134; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 2135; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 2136; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 2137; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2138; GFX11-NEXT: s_setpc_b64 s[30:31] 2139 store <16 x double> %arg0, ptr addrspace(1) undef 2140 ret void 2141} 2142 2143define void @void_func_v2f16(<2 x half> %arg0) #0 { 2144; CI-LABEL: void_func_v2f16: 2145; CI: ; %bb.0: 2146; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2147; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 2148; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 2149; CI-NEXT: s_mov_b32 s7, 0xf000 2150; CI-NEXT: s_mov_b32 s6, -1 2151; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2152; CI-NEXT: v_or_b32_e32 v0, v0, v1 2153; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 2154; CI-NEXT: s_waitcnt vmcnt(0) 2155; CI-NEXT: s_setpc_b64 s[30:31] 2156; 2157; GFX89-LABEL: void_func_v2f16: 2158; GFX89: ; %bb.0: 2159; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2160; GFX89-NEXT: s_mov_b32 s7, 0xf000 2161; GFX89-NEXT: s_mov_b32 s6, -1 2162; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 2163; GFX89-NEXT: s_waitcnt vmcnt(0) 2164; GFX89-NEXT: s_setpc_b64 s[30:31] 2165; 2166; GFX11-LABEL: void_func_v2f16: 2167; GFX11: ; %bb.0: 2168; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2169; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2170; GFX11-NEXT: s_mov_b32 s2, -1 2171; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 2172; GFX11-NEXT: s_setpc_b64 s[30:31] 2173 store <2 x half> %arg0, ptr addrspace(1) undef 2174 ret void 2175} 2176 2177; FIXME: Different abi if f16 legal 2178define void @void_func_v3f16(<3 x half> %arg0) #0 { 2179; CI-LABEL: void_func_v3f16: 2180; CI: ; %bb.0: 2181; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2182; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 2183; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 2184; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 2185; CI-NEXT: s_mov_b32 s7, 0xf000 2186; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2187; CI-NEXT: s_mov_b32 s6, -1 2188; CI-NEXT: v_or_b32_e32 v0, v0, v1 2189; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 2190; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 2191; CI-NEXT: s_waitcnt vmcnt(0) 2192; CI-NEXT: s_setpc_b64 s[30:31] 2193; 2194; GFX89-LABEL: void_func_v3f16: 2195; GFX89: ; %bb.0: 2196; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2197; GFX89-NEXT: s_mov_b32 s7, 0xf000 2198; GFX89-NEXT: s_mov_b32 s6, -1 2199; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 2200; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 2201; GFX89-NEXT: s_waitcnt vmcnt(0) 2202; GFX89-NEXT: s_setpc_b64 s[30:31] 2203; 2204; GFX11-LABEL: void_func_v3f16: 2205; GFX11: ; %bb.0: 2206; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2207; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2208; GFX11-NEXT: s_mov_b32 s2, -1 2209; GFX11-NEXT: s_clause 0x1 2210; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 2211; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 2212; GFX11-NEXT: s_setpc_b64 s[30:31] 2213 store <3 x half> %arg0, ptr addrspace(1) undef 2214 ret void 2215} 2216 2217define void @void_func_v4f16(<4 x half> %arg0) #0 { 2218; CI-LABEL: void_func_v4f16: 2219; CI: ; %bb.0: 2220; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2221; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 2222; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 2223; CI-NEXT: v_cvt_f16_f32_e32 v4, v1 2224; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 2225; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v3 2226; CI-NEXT: v_or_b32_e32 v1, v2, v1 2227; CI-NEXT: v_lshlrev_b32_e32 v2, 16, v4 2228; CI-NEXT: v_or_b32_e32 v0, v0, v2 2229; CI-NEXT: s_mov_b32 s7, 0xf000 2230; CI-NEXT: s_mov_b32 s6, -1 2231; CI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2232; CI-NEXT: s_waitcnt vmcnt(0) 2233; CI-NEXT: s_setpc_b64 s[30:31] 2234; 2235; GFX89-LABEL: void_func_v4f16: 2236; GFX89: ; %bb.0: 2237; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2238; GFX89-NEXT: s_mov_b32 s7, 0xf000 2239; GFX89-NEXT: s_mov_b32 s6, -1 2240; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2241; GFX89-NEXT: s_waitcnt vmcnt(0) 2242; GFX89-NEXT: s_setpc_b64 s[30:31] 2243; 2244; GFX11-LABEL: void_func_v4f16: 2245; GFX11: ; %bb.0: 2246; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2247; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2248; GFX11-NEXT: s_mov_b32 s2, -1 2249; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 2250; GFX11-NEXT: s_setpc_b64 s[30:31] 2251 store <4 x half> %arg0, ptr addrspace(1) undef 2252 ret void 2253} 2254 2255define void @void_func_v8f16(<8 x half> %arg0) #0 { 2256; CI-LABEL: void_func_v8f16: 2257; CI: ; %bb.0: 2258; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2259; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 2260; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 2261; CI-NEXT: v_cvt_f16_f32_e32 v8, v5 2262; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 2263; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 2264; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 2265; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 2266; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 2267; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 2268; CI-NEXT: v_or_b32_e32 v5, v6, v5 2269; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v8 2270; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2271; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2272; CI-NEXT: v_or_b32_e32 v4, v4, v6 2273; CI-NEXT: v_or_b32_e32 v3, v2, v3 2274; CI-NEXT: v_or_b32_e32 v2, v0, v1 2275; CI-NEXT: s_mov_b32 s7, 0xf000 2276; CI-NEXT: s_mov_b32 s6, -1 2277; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 2278; CI-NEXT: s_waitcnt vmcnt(0) 2279; CI-NEXT: s_setpc_b64 s[30:31] 2280; 2281; GFX89-LABEL: void_func_v8f16: 2282; GFX89: ; %bb.0: 2283; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2284; GFX89-NEXT: s_mov_b32 s7, 0xf000 2285; GFX89-NEXT: s_mov_b32 s6, -1 2286; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2287; GFX89-NEXT: s_waitcnt vmcnt(0) 2288; GFX89-NEXT: s_setpc_b64 s[30:31] 2289; 2290; GFX11-LABEL: void_func_v8f16: 2291; GFX11: ; %bb.0: 2292; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2293; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2294; GFX11-NEXT: s_mov_b32 s2, -1 2295; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2296; GFX11-NEXT: s_setpc_b64 s[30:31] 2297 store <8 x half> %arg0, ptr addrspace(1) undef 2298 ret void 2299} 2300 2301define void @void_func_v16f16(<16 x half> %arg0) #0 { 2302; CI-LABEL: void_func_v16f16: 2303; CI: ; %bb.0: 2304; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2305; CI-NEXT: v_cvt_f16_f32_e32 v7, v7 2306; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 2307; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 2308; CI-NEXT: v_cvt_f16_f32_e32 v6, v6 2309; CI-NEXT: v_cvt_f16_f32_e32 v16, v5 2310; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 2311; CI-NEXT: v_cvt_f16_f32_e32 v0, v0 2312; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 2313; CI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 2314; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2315; CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 2316; CI-NEXT: v_or_b32_e32 v5, v6, v5 2317; CI-NEXT: v_lshlrev_b32_e32 v6, 16, v16 2318; CI-NEXT: v_or_b32_e32 v3, v2, v3 2319; CI-NEXT: v_or_b32_e32 v2, v0, v1 2320; CI-NEXT: v_cvt_f16_f32_e32 v0, v15 2321; CI-NEXT: v_or_b32_e32 v4, v4, v6 2322; CI-NEXT: v_cvt_f16_f32_e32 v1, v14 2323; CI-NEXT: v_cvt_f16_f32_e32 v6, v13 2324; CI-NEXT: v_cvt_f16_f32_e32 v7, v12 2325; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2326; CI-NEXT: v_or_b32_e32 v13, v1, v0 2327; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 2328; CI-NEXT: v_or_b32_e32 v12, v7, v0 2329; CI-NEXT: v_cvt_f16_f32_e32 v0, v11 2330; CI-NEXT: v_cvt_f16_f32_e32 v1, v10 2331; CI-NEXT: v_cvt_f16_f32_e32 v6, v9 2332; CI-NEXT: v_cvt_f16_f32_e32 v7, v8 2333; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 2334; CI-NEXT: v_or_b32_e32 v11, v1, v0 2335; CI-NEXT: v_lshlrev_b32_e32 v0, 16, v6 2336; CI-NEXT: v_or_b32_e32 v10, v7, v0 2337; CI-NEXT: s_mov_b32 s7, 0xf000 2338; CI-NEXT: s_mov_b32 s6, -1 2339; CI-NEXT: buffer_store_dwordx4 v[10:13], off, s[4:7], 0 2340; CI-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 2341; CI-NEXT: s_waitcnt vmcnt(0) 2342; CI-NEXT: s_setpc_b64 s[30:31] 2343; 2344; GFX89-LABEL: void_func_v16f16: 2345; GFX89: ; %bb.0: 2346; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2347; GFX89-NEXT: s_mov_b32 s7, 0xf000 2348; GFX89-NEXT: s_mov_b32 s6, -1 2349; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2350; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2351; GFX89-NEXT: s_waitcnt vmcnt(0) 2352; GFX89-NEXT: s_setpc_b64 s[30:31] 2353; 2354; GFX11-LABEL: void_func_v16f16: 2355; GFX11: ; %bb.0: 2356; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2358; GFX11-NEXT: s_mov_b32 s2, -1 2359; GFX11-NEXT: s_clause 0x1 2360; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 2361; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 2362; GFX11-NEXT: s_setpc_b64 s[30:31] 2363 store <16 x half> %arg0, ptr addrspace(1) undef 2364 ret void 2365} 2366 2367; Make sure there is no alignment requirement for passed vgprs. 2368define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { 2369; CIGFX89-LABEL: void_func_i32_i64_i32: 2370; CIGFX89: ; %bb.0: 2371; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2372; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2373; CIGFX89-NEXT: s_mov_b32 s6, -1 2374; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 2375; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2376; CIGFX89-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 2377; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2378; CIGFX89-NEXT: buffer_store_dword v3, off, s[4:7], 0 2379; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2380; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2381; 2382; GFX11-LABEL: void_func_i32_i64_i32: 2383; GFX11: ; %bb.0: 2384; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2385; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2386; GFX11-NEXT: s_mov_b32 s2, -1 2387; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 dlc 2388; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2389; GFX11-NEXT: buffer_store_b64 v[1:2], off, s[0:3], 0 dlc 2390; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2391; GFX11-NEXT: buffer_store_b32 v3, off, s[0:3], 0 dlc 2392; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2393; GFX11-NEXT: s_setpc_b64 s[30:31] 2394 store volatile i32 %arg0, ptr addrspace(1) undef 2395 store volatile i64 %arg1, ptr addrspace(1) undef 2396 store volatile i32 %arg2, ptr addrspace(1) undef 2397 ret void 2398} 2399 2400define void @void_func_struct_i32({ i32 } %arg0) #0 { 2401; CIGFX89-LABEL: void_func_struct_i32: 2402; CIGFX89: ; %bb.0: 2403; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2404; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2405; CIGFX89-NEXT: s_mov_b32 s6, -1 2406; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 2407; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2408; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2409; 2410; GFX11-LABEL: void_func_struct_i32: 2411; GFX11: ; %bb.0: 2412; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2413; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2414; GFX11-NEXT: s_mov_b32 s2, -1 2415; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 2416; GFX11-NEXT: s_setpc_b64 s[30:31] 2417 store { i32 } %arg0, ptr addrspace(1) undef 2418 ret void 2419} 2420 2421define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { 2422; CIGFX89-LABEL: void_func_struct_i8_i32: 2423; CIGFX89: ; %bb.0: 2424; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2425; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2426; CIGFX89-NEXT: s_mov_b32 s6, -1 2427; CIGFX89-NEXT: buffer_store_dword v1, off, s[4:7], 0 2428; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 2429; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2430; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2431; 2432; GFX11-LABEL: void_func_struct_i8_i32: 2433; GFX11: ; %bb.0: 2434; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2435; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2436; GFX11-NEXT: s_mov_b32 s2, -1 2437; GFX11-NEXT: s_clause 0x1 2438; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 2439; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 2440; GFX11-NEXT: s_setpc_b64 s[30:31] 2441 store { i8, i32 } %arg0, ptr addrspace(1) undef 2442 ret void 2443} 2444 2445define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 { 2446; CIGFX89-LABEL: void_func_byval_struct_i8_i32: 2447; CIGFX89: ; %bb.0: 2448; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2449; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 2450; CIGFX89-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 2451; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2452; CIGFX89-NEXT: s_mov_b32 s6, -1 2453; CIGFX89-NEXT: s_waitcnt vmcnt(1) 2454; CIGFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 2455; CIGFX89-NEXT: s_waitcnt vmcnt(1) 2456; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 2457; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2458; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2459; 2460; GFX11-LABEL: void_func_byval_struct_i8_i32: 2461; GFX11: ; %bb.0: 2462; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2463; GFX11-NEXT: s_clause 0x1 2464; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 2465; GFX11-NEXT: scratch_load_u8 v1, off, s32 2466; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2467; GFX11-NEXT: s_mov_b32 s2, -1 2468; GFX11-NEXT: s_waitcnt vmcnt(1) 2469; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 2470; GFX11-NEXT: s_waitcnt vmcnt(0) 2471; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 2472; GFX11-NEXT: s_setpc_b64 s[30:31] 2473 %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0 2474 store { i8, i32 } %arg0.load, ptr addrspace(1) undef 2475 ret void 2476} 2477 2478define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 { 2479; CI-LABEL: void_func_byval_struct_i8_i32_x2: 2480; CI: ; %bb.0: 2481; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2482; CI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc 2483; CI-NEXT: s_waitcnt vmcnt(0) 2484; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc 2485; CI-NEXT: s_waitcnt vmcnt(0) 2486; CI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc 2487; CI-NEXT: s_waitcnt vmcnt(0) 2488; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc 2489; CI-NEXT: s_waitcnt vmcnt(0) 2490; CI-NEXT: s_mov_b32 s7, 0xf000 2491; CI-NEXT: s_mov_b32 s6, -1 2492; CI-NEXT: s_mov_b32 m0, -1 2493; CI-NEXT: buffer_store_dword v2, off, s[4:7], 0 2494; CI-NEXT: s_waitcnt vmcnt(0) 2495; CI-NEXT: buffer_store_byte v1, off, s[4:7], 0 2496; CI-NEXT: s_waitcnt vmcnt(0) 2497; CI-NEXT: buffer_store_dword v4, off, s[4:7], 0 2498; CI-NEXT: s_waitcnt vmcnt(0) 2499; CI-NEXT: buffer_store_byte v3, off, s[4:7], 0 2500; CI-NEXT: s_waitcnt vmcnt(0) 2501; CI-NEXT: ds_write_b32 v0, v0 2502; CI-NEXT: s_waitcnt lgkmcnt(0) 2503; CI-NEXT: s_setpc_b64 s[30:31] 2504; 2505; VI-LABEL: void_func_byval_struct_i8_i32_x2: 2506; VI: ; %bb.0: 2507; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2508; VI-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc 2509; VI-NEXT: s_waitcnt vmcnt(0) 2510; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc 2511; VI-NEXT: s_waitcnt vmcnt(0) 2512; VI-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc 2513; VI-NEXT: s_waitcnt vmcnt(0) 2514; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc 2515; VI-NEXT: s_waitcnt vmcnt(0) 2516; VI-NEXT: s_mov_b32 s7, 0xf000 2517; VI-NEXT: s_mov_b32 s6, -1 2518; VI-NEXT: s_mov_b32 m0, -1 2519; VI-NEXT: buffer_store_dword v2, off, s[4:7], 0 2520; VI-NEXT: s_waitcnt vmcnt(0) 2521; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 2522; VI-NEXT: s_waitcnt vmcnt(0) 2523; VI-NEXT: buffer_store_dword v4, off, s[4:7], 0 2524; VI-NEXT: s_waitcnt vmcnt(0) 2525; VI-NEXT: buffer_store_byte v3, off, s[4:7], 0 2526; VI-NEXT: s_waitcnt vmcnt(0) 2527; VI-NEXT: ds_write_b32 v0, v0 2528; VI-NEXT: s_waitcnt lgkmcnt(0) 2529; VI-NEXT: s_setpc_b64 s[30:31] 2530; 2531; GFX9-LABEL: void_func_byval_struct_i8_i32_x2: 2532; GFX9: ; %bb.0: 2533; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2534; GFX9-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 glc 2535; GFX9-NEXT: s_waitcnt vmcnt(0) 2536; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:4 glc 2537; GFX9-NEXT: s_waitcnt vmcnt(0) 2538; GFX9-NEXT: buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc 2539; GFX9-NEXT: s_waitcnt vmcnt(0) 2540; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 glc 2541; GFX9-NEXT: s_waitcnt vmcnt(0) 2542; GFX9-NEXT: s_mov_b32 s7, 0xf000 2543; GFX9-NEXT: s_mov_b32 s6, -1 2544; GFX9-NEXT: buffer_store_dword v2, off, s[4:7], 0 2545; GFX9-NEXT: s_waitcnt vmcnt(0) 2546; GFX9-NEXT: buffer_store_byte v1, off, s[4:7], 0 2547; GFX9-NEXT: s_waitcnt vmcnt(0) 2548; GFX9-NEXT: buffer_store_dword v4, off, s[4:7], 0 2549; GFX9-NEXT: s_waitcnt vmcnt(0) 2550; GFX9-NEXT: buffer_store_byte v3, off, s[4:7], 0 2551; GFX9-NEXT: s_waitcnt vmcnt(0) 2552; GFX9-NEXT: ds_write_b32 v0, v0 2553; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2554; GFX9-NEXT: s_setpc_b64 s[30:31] 2555; 2556; GFX11-LABEL: void_func_byval_struct_i8_i32_x2: 2557; GFX11: ; %bb.0: 2558; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2559; GFX11-NEXT: scratch_load_u8 v1, off, s32 glc dlc 2560; GFX11-NEXT: s_waitcnt vmcnt(0) 2561; GFX11-NEXT: scratch_load_b32 v2, off, s32 offset:4 glc dlc 2562; GFX11-NEXT: s_waitcnt vmcnt(0) 2563; GFX11-NEXT: scratch_load_u8 v3, off, s32 offset:8 glc dlc 2564; GFX11-NEXT: s_waitcnt vmcnt(0) 2565; GFX11-NEXT: scratch_load_b32 v4, off, s32 offset:12 glc dlc 2566; GFX11-NEXT: s_waitcnt vmcnt(0) 2567; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2568; GFX11-NEXT: s_mov_b32 s2, -1 2569; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 dlc 2570; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2571; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc 2572; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2573; GFX11-NEXT: buffer_store_b32 v4, off, s[0:3], 0 dlc 2574; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2575; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc 2576; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2577; GFX11-NEXT: ds_store_b32 v0, v0 2578; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2579; GFX11-NEXT: s_setpc_b64 s[30:31] 2580 %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0 2581 %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1 2582 store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef 2583 store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef 2584 store volatile i32 %arg2, ptr addrspace(3) undef 2585 ret void 2586} 2587 2588define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 { 2589; CIGFX89-LABEL: void_func_byval_i32_byval_i64: 2590; CIGFX89: ; %bb.0: 2591; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2592; CIGFX89-NEXT: buffer_load_dword v2, off, s[0:3], s32 2593; CIGFX89-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 2594; CIGFX89-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 2595; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2596; CIGFX89-NEXT: s_mov_b32 s6, -1 2597; CIGFX89-NEXT: s_waitcnt vmcnt(2) 2598; CIGFX89-NEXT: buffer_store_dword v2, off, s[4:7], 0 2599; CIGFX89-NEXT: s_waitcnt vmcnt(1) 2600; CIGFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2601; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2602; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2603; 2604; GFX11-LABEL: void_func_byval_i32_byval_i64: 2605; GFX11: ; %bb.0: 2606; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2607; GFX11-NEXT: s_clause 0x1 2608; GFX11-NEXT: scratch_load_b32 v2, off, s32 2609; GFX11-NEXT: scratch_load_b64 v[0:1], off, s32 offset:8 2610; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2611; GFX11-NEXT: s_mov_b32 s2, -1 2612; GFX11-NEXT: s_waitcnt vmcnt(1) 2613; GFX11-NEXT: buffer_store_b32 v2, off, s[0:3], 0 2614; GFX11-NEXT: s_waitcnt vmcnt(0) 2615; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 2616; GFX11-NEXT: s_setpc_b64 s[30:31] 2617 %arg0.load = load i32, ptr addrspace(5) %arg0 2618 %arg1.load = load i64, ptr addrspace(5) %arg1 2619 store i32 %arg0.load, ptr addrspace(1) undef 2620 store i64 %arg1.load, ptr addrspace(1) undef 2621 ret void 2622} 2623 2624define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { 2625; CIGFX89-LABEL: void_func_v32i32_i32_i64: 2626; CIGFX89: ; %bb.0: 2627; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2628; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 2629; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12 2630; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 2631; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 2632; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2633; CIGFX89-NEXT: s_mov_b32 s6, -1 2634; CIGFX89-NEXT: s_waitcnt vmcnt(3) 2635; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2636; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2637; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2638; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2639; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2640; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2641; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2642; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2643; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2644; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2645; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2646; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2647; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2648; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2649; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2650; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2651; CIGFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0 2652; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2653; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 2654; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2655; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2656; 2657; GFX11-LABEL: void_func_v32i32_i32_i64: 2658; GFX11: ; %bb.0: 2659; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2660; GFX11-NEXT: s_clause 0x3 2661; GFX11-NEXT: scratch_load_b32 v31, off, s32 2662; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:12 2663; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 2664; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:8 2665; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2666; GFX11-NEXT: s_mov_b32 s2, -1 2667; GFX11-NEXT: s_waitcnt vmcnt(3) 2668; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 2669; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2670; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 2671; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2672; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 2673; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2674; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 2675; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2676; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 2677; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2678; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 2679; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2680; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 2681; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2682; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 2683; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2684; GFX11-NEXT: s_waitcnt vmcnt(1) 2685; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc 2686; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2687; GFX11-NEXT: s_waitcnt vmcnt(0) 2688; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc 2689; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2690; GFX11-NEXT: s_setpc_b64 s[30:31] 2691 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 2692 store volatile i32 %arg1, ptr addrspace(1) undef 2693 store volatile i64 %arg2, ptr addrspace(1) undef 2694 ret void 2695} 2696 2697; FIXME: Different ext load types on CI vs. VI 2698define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 { 2699; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16: 2700; CI: ; %bb.0: 2701; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2702; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 2703; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20 2704; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:16 2705; CI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:4 2706; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 2707; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12 2708; CI-NEXT: s_mov_b32 s7, 0xf000 2709; CI-NEXT: s_mov_b32 s6, -1 2710; CI-NEXT: s_waitcnt vmcnt(5) 2711; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2712; CI-NEXT: s_waitcnt vmcnt(0) 2713; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2714; CI-NEXT: s_waitcnt vmcnt(0) 2715; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2716; CI-NEXT: s_waitcnt vmcnt(0) 2717; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2718; CI-NEXT: s_waitcnt vmcnt(0) 2719; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2720; CI-NEXT: s_waitcnt vmcnt(0) 2721; CI-NEXT: v_mul_f32_e32 v12, 1.0, v32 2722; CI-NEXT: v_cvt_f16_f32_e32 v13, v33 2723; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2724; CI-NEXT: s_waitcnt vmcnt(0) 2725; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2726; CI-NEXT: s_waitcnt vmcnt(0) 2727; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2728; CI-NEXT: s_waitcnt vmcnt(0) 2729; CI-NEXT: v_and_b32_e32 v0, 1, v34 2730; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v12 2731; CI-NEXT: buffer_store_byte v0, off, s[4:7], 0 2732; CI-NEXT: s_waitcnt vmcnt(0) 2733; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0 2734; CI-NEXT: s_waitcnt vmcnt(0) 2735; CI-NEXT: buffer_store_short v36, off, s[4:7], 0 2736; CI-NEXT: s_waitcnt vmcnt(0) 2737; CI-NEXT: buffer_store_short v13, off, s[4:7], 0 2738; CI-NEXT: s_waitcnt vmcnt(0) 2739; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 2740; CI-NEXT: s_waitcnt vmcnt(0) 2741; CI-NEXT: s_setpc_b64 s[30:31] 2742; 2743; GFX89-LABEL: void_func_v32i32_i1_i8_i16_bf16: 2744; GFX89: ; %bb.0: 2745; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2746; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 2747; GFX89-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:4 2748; GFX89-NEXT: buffer_load_ushort v33, off, s[0:3], s32 offset:8 2749; GFX89-NEXT: buffer_load_ushort v34, off, s[0:3], s32 offset:12 2750; GFX89-NEXT: buffer_load_ushort v35, off, s[0:3], s32 offset:16 2751; GFX89-NEXT: buffer_load_ushort v36, off, s[0:3], s32 offset:20 2752; GFX89-NEXT: s_mov_b32 s7, 0xf000 2753; GFX89-NEXT: s_mov_b32 s6, -1 2754; GFX89-NEXT: s_waitcnt vmcnt(5) 2755; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2756; GFX89-NEXT: s_waitcnt vmcnt(0) 2757; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2758; GFX89-NEXT: s_waitcnt vmcnt(0) 2759; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2760; GFX89-NEXT: s_waitcnt vmcnt(0) 2761; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2762; GFX89-NEXT: s_waitcnt vmcnt(0) 2763; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2764; GFX89-NEXT: s_waitcnt vmcnt(0) 2765; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2766; GFX89-NEXT: s_waitcnt vmcnt(0) 2767; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2768; GFX89-NEXT: s_waitcnt vmcnt(0) 2769; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2770; GFX89-NEXT: s_waitcnt vmcnt(0) 2771; GFX89-NEXT: v_and_b32_e32 v0, 1, v32 2772; GFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 2773; GFX89-NEXT: s_waitcnt vmcnt(0) 2774; GFX89-NEXT: buffer_store_byte v33, off, s[4:7], 0 2775; GFX89-NEXT: s_waitcnt vmcnt(0) 2776; GFX89-NEXT: buffer_store_short v34, off, s[4:7], 0 2777; GFX89-NEXT: s_waitcnt vmcnt(0) 2778; GFX89-NEXT: buffer_store_short v35, off, s[4:7], 0 2779; GFX89-NEXT: s_waitcnt vmcnt(0) 2780; GFX89-NEXT: buffer_store_short v36, off, s[4:7], 0 2781; GFX89-NEXT: s_waitcnt vmcnt(0) 2782; GFX89-NEXT: s_setpc_b64 s[30:31] 2783; 2784; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16: 2785; GFX11: ; %bb.0: 2786; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2787; GFX11-NEXT: s_clause 0x5 2788; GFX11-NEXT: scratch_load_b32 v31, off, s32 2789; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:4 2790; GFX11-NEXT: scratch_load_u16 v33, off, s32 offset:8 2791; GFX11-NEXT: scratch_load_u16 v34, off, s32 offset:12 2792; GFX11-NEXT: scratch_load_u16 v35, off, s32 offset:16 2793; GFX11-NEXT: scratch_load_u16 v36, off, s32 offset:20 2794; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2795; GFX11-NEXT: s_mov_b32 s2, -1 2796; GFX11-NEXT: s_waitcnt vmcnt(5) 2797; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 2798; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2799; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 2800; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2801; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 2802; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2803; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 2804; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2805; GFX11-NEXT: s_waitcnt vmcnt(4) 2806; GFX11-NEXT: v_and_b32_e32 v16, 1, v32 2807; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 2808; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2809; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 2810; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2811; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 2812; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2813; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 2814; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2815; GFX11-NEXT: buffer_store_b8 v16, off, s[0:3], 0 dlc 2816; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2817; GFX11-NEXT: s_waitcnt vmcnt(3) 2818; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc 2819; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2820; GFX11-NEXT: s_waitcnt vmcnt(2) 2821; GFX11-NEXT: buffer_store_b16 v34, off, s[0:3], 0 dlc 2822; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2823; GFX11-NEXT: s_waitcnt vmcnt(1) 2824; GFX11-NEXT: buffer_store_b16 v35, off, s[0:3], 0 dlc 2825; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2826; GFX11-NEXT: s_waitcnt vmcnt(0) 2827; GFX11-NEXT: buffer_store_b16 v36, off, s[0:3], 0 dlc 2828; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2829; GFX11-NEXT: s_setpc_b64 s[30:31] 2830 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 2831 store volatile i1 %arg1, ptr addrspace(1) undef 2832 store volatile i8 %arg2, ptr addrspace(1) undef 2833 store volatile i16 %arg3, ptr addrspace(1) undef 2834 store volatile half %arg4, ptr addrspace(1) undef 2835 store volatile bfloat %arg5, ptr addrspace(1) undef 2836 ret void 2837} 2838 2839define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { 2840; CIGFX89-LABEL: void_func_v32i32_v2i32_v2f32: 2841; CIGFX89: ; %bb.0: 2842; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2843; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 2844; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 2845; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 2846; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 2847; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 2848; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 2849; CIGFX89-NEXT: s_mov_b32 s6, -1 2850; CIGFX89-NEXT: s_waitcnt vmcnt(4) 2851; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2852; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2853; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2854; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2855; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2856; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2857; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2858; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2859; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2860; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2861; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2862; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2863; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2864; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2865; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2866; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2867; CIGFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 2868; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2869; CIGFX89-NEXT: buffer_store_dwordx2 v[34:35], off, s[4:7], 0 2870; CIGFX89-NEXT: s_waitcnt vmcnt(0) 2871; CIGFX89-NEXT: s_setpc_b64 s[30:31] 2872; 2873; GFX11-LABEL: void_func_v32i32_v2i32_v2f32: 2874; GFX11: ; %bb.0: 2875; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2876; GFX11-NEXT: s_clause 0x4 2877; GFX11-NEXT: scratch_load_b32 v31, off, s32 2878; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 2879; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 2880; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 2881; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 2882; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2883; GFX11-NEXT: s_mov_b32 s2, -1 2884; GFX11-NEXT: s_waitcnt vmcnt(4) 2885; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 2886; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2887; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 2888; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2889; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 2890; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2891; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 2892; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2893; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 2894; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2895; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 2896; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2897; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 2898; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2899; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 2900; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2901; GFX11-NEXT: s_waitcnt vmcnt(2) 2902; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc 2903; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2904; GFX11-NEXT: s_waitcnt vmcnt(0) 2905; GFX11-NEXT: buffer_store_b64 v[34:35], off, s[0:3], 0 dlc 2906; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2907; GFX11-NEXT: s_setpc_b64 s[30:31] 2908 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 2909 store volatile <2 x i32> %arg1, ptr addrspace(1) undef 2910 store volatile <2 x float> %arg2, ptr addrspace(1) undef 2911 ret void 2912} 2913 2914define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 { 2915; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: 2916; CI: ; %bb.0: 2917; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2918; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 2919; CI-NEXT: s_mov_b32 s7, 0xf000 2920; CI-NEXT: s_mov_b32 s6, -1 2921; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 2922; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:32 2923; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 2924; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:40 2925; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 2926; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 2927; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 2928; CI-NEXT: s_waitcnt vmcnt(7) 2929; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2930; CI-NEXT: s_waitcnt vmcnt(0) 2931; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2932; CI-NEXT: s_waitcnt vmcnt(0) 2933; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 2934; CI-NEXT: s_waitcnt vmcnt(0) 2935; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:16 2936; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 2937; CI-NEXT: s_waitcnt vmcnt(0) 2938; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:8 2939; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:4 2940; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 2941; CI-NEXT: s_waitcnt vmcnt(0) 2942; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 2943; CI-NEXT: s_waitcnt vmcnt(0) 2944; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2945; CI-NEXT: s_waitcnt vmcnt(0) 2946; CI-NEXT: v_cvt_f16_f32_e32 v10, v38 2947; CI-NEXT: v_mul_f32_e32 v4, 1.0, v32 2948; CI-NEXT: v_mul_f32_e32 v5, 1.0, v33 2949; CI-NEXT: v_mul_f32_e32 v6, 1.0, v34 2950; CI-NEXT: v_mul_f32_e32 v7, 1.0, v35 2951; CI-NEXT: v_mul_f32_e32 v8, 1.0, v36 2952; CI-NEXT: v_mul_f32_e32 v9, 1.0, v37 2953; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 2954; CI-NEXT: s_waitcnt vmcnt(0) 2955; CI-NEXT: buffer_store_short v16, off, s[4:7], 0 2956; CI-NEXT: s_waitcnt vmcnt(0) 2957; CI-NEXT: buffer_store_short v17, off, s[4:7], 0 2958; CI-NEXT: s_waitcnt vmcnt(0) 2959; CI-NEXT: v_cvt_f16_f32_e32 v11, v20 2960; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v4 2961; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v5 2962; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v6 2963; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v7 2964; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v8 2965; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v9 2966; CI-NEXT: buffer_store_short v11, off, s[4:7], 0 2967; CI-NEXT: s_waitcnt vmcnt(0) 2968; CI-NEXT: buffer_store_short v10, off, s[4:7], 0 2969; CI-NEXT: s_waitcnt vmcnt(0) 2970; CI-NEXT: buffer_store_short v5, off, s[4:7], 0 2971; CI-NEXT: s_waitcnt vmcnt(0) 2972; CI-NEXT: buffer_store_short v4, off, s[4:7], 0 2973; CI-NEXT: s_waitcnt vmcnt(0) 2974; CI-NEXT: buffer_store_short v3, off, s[4:7], 0 2975; CI-NEXT: s_waitcnt vmcnt(0) 2976; CI-NEXT: buffer_store_short v2, off, s[4:7], 0 2977; CI-NEXT: s_waitcnt vmcnt(0) 2978; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 2979; CI-NEXT: s_waitcnt vmcnt(0) 2980; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 2981; CI-NEXT: s_waitcnt vmcnt(0) 2982; CI-NEXT: s_setpc_b64 s[30:31] 2983; 2984; GFX89-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: 2985; GFX89: ; %bb.0: 2986; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2987; GFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 2988; GFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:20 2989; GFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 2990; GFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 2991; GFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:8 2992; GFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:12 2993; GFX89-NEXT: s_mov_b32 s7, 0xf000 2994; GFX89-NEXT: s_mov_b32 s6, -1 2995; GFX89-NEXT: s_waitcnt vmcnt(5) 2996; GFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 2997; GFX89-NEXT: s_waitcnt vmcnt(0) 2998; GFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 2999; GFX89-NEXT: s_waitcnt vmcnt(0) 3000; GFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3001; GFX89-NEXT: s_waitcnt vmcnt(0) 3002; GFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3003; GFX89-NEXT: s_waitcnt vmcnt(0) 3004; GFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3005; GFX89-NEXT: s_waitcnt vmcnt(0) 3006; GFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3007; GFX89-NEXT: s_waitcnt vmcnt(0) 3008; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3009; GFX89-NEXT: s_waitcnt vmcnt(0) 3010; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3011; GFX89-NEXT: s_waitcnt vmcnt(0) 3012; GFX89-NEXT: buffer_store_dword v34, off, s[4:7], 0 3013; GFX89-NEXT: s_waitcnt vmcnt(0) 3014; GFX89-NEXT: buffer_store_dword v35, off, s[4:7], 0 3015; GFX89-NEXT: s_waitcnt vmcnt(0) 3016; GFX89-NEXT: buffer_store_dword v36, off, s[4:7], 0 3017; GFX89-NEXT: s_waitcnt vmcnt(0) 3018; GFX89-NEXT: buffer_store_dwordx2 v[32:33], off, s[4:7], 0 3019; GFX89-NEXT: s_waitcnt vmcnt(0) 3020; GFX89-NEXT: s_setpc_b64 s[30:31] 3021; 3022; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16: 3023; GFX11: ; %bb.0: 3024; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3025; GFX11-NEXT: s_clause 0x5 3026; GFX11-NEXT: scratch_load_b32 v31, off, s32 3027; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 3028; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:4 3029; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:8 3030; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:12 3031; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:16 3032; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3033; GFX11-NEXT: s_mov_b32 s2, -1 3034; GFX11-NEXT: s_waitcnt vmcnt(5) 3035; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 3036; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3037; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 3038; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3039; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 3040; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3041; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 3042; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3043; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 3044; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3045; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 3046; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3047; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 3048; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3049; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 3050; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3051; GFX11-NEXT: s_waitcnt vmcnt(3) 3052; GFX11-NEXT: buffer_store_b32 v34, off, s[0:3], 0 dlc 3053; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3054; GFX11-NEXT: s_waitcnt vmcnt(2) 3055; GFX11-NEXT: buffer_store_b32 v35, off, s[0:3], 0 dlc 3056; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3057; GFX11-NEXT: s_waitcnt vmcnt(1) 3058; GFX11-NEXT: buffer_store_b32 v36, off, s[0:3], 0 dlc 3059; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3060; GFX11-NEXT: s_waitcnt vmcnt(0) 3061; GFX11-NEXT: buffer_store_b64 v[32:33], off, s[0:3], 0 dlc 3062; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3063; GFX11-NEXT: s_setpc_b64 s[30:31] 3064 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 3065 store volatile <2 x i16> %arg1, ptr addrspace(1) undef 3066 store volatile <2 x half> %arg2, ptr addrspace(1) undef 3067 store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef 3068 store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef 3069 ret void 3070} 3071 3072define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { 3073; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64: 3074; CIGFX89: ; %bb.0: 3075; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3076; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 3077; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 3078; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 3079; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 3080; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 3081; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 3082; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 3083; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 3084; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 3085; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 3086; CIGFX89-NEXT: s_mov_b32 s6, -1 3087; CIGFX89-NEXT: s_waitcnt vmcnt(8) 3088; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3089; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3090; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3091; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3092; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3093; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3094; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3095; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3096; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3097; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3098; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3099; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3100; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3101; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3102; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3103; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3104; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3105; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3106; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3107; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3108; CIGFX89-NEXT: s_setpc_b64 s[30:31] 3109; 3110; GFX11-LABEL: void_func_v32i32_v2i64_v2f64: 3111; GFX11: ; %bb.0: 3112; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3113; GFX11-NEXT: s_clause 0x8 3114; GFX11-NEXT: scratch_load_b32 v31, off, s32 3115; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:32 3116; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:28 3117; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:24 3118; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:16 3119; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:12 3120; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:8 3121; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:4 3122; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:20 3123; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3124; GFX11-NEXT: s_mov_b32 s2, -1 3125; GFX11-NEXT: s_waitcnt vmcnt(8) 3126; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 3127; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3128; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 3129; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3130; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 3131; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3132; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 3133; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3134; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 3135; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3136; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 3137; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3138; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 3139; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3140; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 3141; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3142; GFX11-NEXT: s_waitcnt vmcnt(1) 3143; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc 3144; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3145; GFX11-NEXT: s_waitcnt vmcnt(0) 3146; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc 3147; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3148; GFX11-NEXT: s_setpc_b64 s[30:31] 3149 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 3150 store volatile <2 x i64> %arg1, ptr addrspace(1) undef 3151 store volatile <2 x double> %arg2, ptr addrspace(1) undef 3152 ret void 3153} 3154 3155define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { 3156; CIGFX89-LABEL: void_func_v32i32_v4i32_v4f32: 3157; CIGFX89: ; %bb.0: 3158; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3159; CIGFX89-NEXT: buffer_load_dword v31, off, s[0:3], s32 3160; CIGFX89-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:20 3161; CIGFX89-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:16 3162; CIGFX89-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 3163; CIGFX89-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 3164; CIGFX89-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 3165; CIGFX89-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:32 3166; CIGFX89-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:28 3167; CIGFX89-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:24 3168; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 3169; CIGFX89-NEXT: s_mov_b32 s6, -1 3170; CIGFX89-NEXT: s_waitcnt vmcnt(8) 3171; CIGFX89-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3172; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3173; CIGFX89-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3174; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3175; CIGFX89-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3176; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3177; CIGFX89-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3178; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3179; CIGFX89-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3180; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3181; CIGFX89-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3182; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3183; CIGFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3184; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3185; CIGFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3186; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3187; CIGFX89-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3188; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3189; CIGFX89-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3190; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3191; CIGFX89-NEXT: s_setpc_b64 s[30:31] 3192; 3193; GFX11-LABEL: void_func_v32i32_v4i32_v4f32: 3194; GFX11: ; %bb.0: 3195; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3196; GFX11-NEXT: s_clause 0x8 3197; GFX11-NEXT: scratch_load_b32 v31, off, s32 3198; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:16 3199; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:12 3200; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 3201; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 3202; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:32 3203; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:28 3204; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:24 3205; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:20 3206; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3207; GFX11-NEXT: s_mov_b32 s2, -1 3208; GFX11-NEXT: s_waitcnt vmcnt(8) 3209; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 3210; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3211; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 3212; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3213; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 3214; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3215; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 3216; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3217; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 3218; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3219; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 3220; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3221; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 3222; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3223; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 3224; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3225; GFX11-NEXT: s_waitcnt vmcnt(4) 3226; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc 3227; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3228; GFX11-NEXT: s_waitcnt vmcnt(0) 3229; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc 3230; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3231; GFX11-NEXT: s_setpc_b64 s[30:31] 3232 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 3233 store volatile <4 x i32> %arg1, ptr addrspace(1) undef 3234 store volatile <4 x float> %arg2, ptr addrspace(1) undef 3235 ret void 3236} 3237 3238define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { 3239; CI-LABEL: void_func_v32i32_v8i32_v8f32: 3240; CI: ; %bb.0: 3241; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3242; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 3243; CI-NEXT: s_mov_b32 s7, 0xf000 3244; CI-NEXT: s_mov_b32 s6, -1 3245; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3246; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3247; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3248; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3249; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 3250; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 3251; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 3252; CI-NEXT: s_waitcnt vmcnt(7) 3253; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3254; CI-NEXT: s_waitcnt vmcnt(0) 3255; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3256; CI-NEXT: s_waitcnt vmcnt(0) 3257; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3258; CI-NEXT: s_waitcnt vmcnt(0) 3259; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3260; CI-NEXT: s_waitcnt vmcnt(0) 3261; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3262; CI-NEXT: s_waitcnt vmcnt(0) 3263; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 3264; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3265; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3266; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3267; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3268; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 3269; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 3270; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 3271; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 3272; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3273; CI-NEXT: s_waitcnt vmcnt(0) 3274; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3275; CI-NEXT: s_waitcnt vmcnt(0) 3276; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3277; CI-NEXT: s_waitcnt vmcnt(0) 3278; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3279; CI-NEXT: s_waitcnt vmcnt(0) 3280; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3281; CI-NEXT: s_waitcnt vmcnt(0) 3282; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3283; CI-NEXT: s_waitcnt vmcnt(0) 3284; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3285; CI-NEXT: s_waitcnt vmcnt(0) 3286; CI-NEXT: s_setpc_b64 s[30:31] 3287; 3288; VI-LABEL: void_func_v32i32_v8i32_v8f32: 3289; VI: ; %bb.0: 3290; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3291; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 3292; VI-NEXT: s_mov_b32 s7, 0xf000 3293; VI-NEXT: s_mov_b32 s6, -1 3294; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3295; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3296; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3297; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3298; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 3299; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 3300; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 3301; VI-NEXT: s_waitcnt vmcnt(7) 3302; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3303; VI-NEXT: s_waitcnt vmcnt(0) 3304; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3305; VI-NEXT: s_waitcnt vmcnt(0) 3306; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3307; VI-NEXT: s_waitcnt vmcnt(0) 3308; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3309; VI-NEXT: s_waitcnt vmcnt(0) 3310; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3311; VI-NEXT: s_waitcnt vmcnt(0) 3312; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 3313; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3314; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3315; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3316; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3317; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 3318; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 3319; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 3320; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 3321; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3322; VI-NEXT: s_waitcnt vmcnt(0) 3323; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3324; VI-NEXT: s_waitcnt vmcnt(0) 3325; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3326; VI-NEXT: s_waitcnt vmcnt(0) 3327; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3328; VI-NEXT: s_waitcnt vmcnt(0) 3329; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3330; VI-NEXT: s_waitcnt vmcnt(0) 3331; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3332; VI-NEXT: s_waitcnt vmcnt(0) 3333; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3334; VI-NEXT: s_waitcnt vmcnt(0) 3335; VI-NEXT: s_setpc_b64 s[30:31] 3336; 3337; GFX9-LABEL: void_func_v32i32_v8i32_v8f32: 3338; GFX9: ; %bb.0: 3339; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3340; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 3341; GFX9-NEXT: s_mov_b32 s7, 0xf000 3342; GFX9-NEXT: s_mov_b32 s6, -1 3343; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3344; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3345; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3346; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3347; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:16 3348; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:12 3349; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:8 3350; GFX9-NEXT: s_waitcnt vmcnt(7) 3351; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3352; GFX9-NEXT: s_waitcnt vmcnt(0) 3353; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3354; GFX9-NEXT: s_waitcnt vmcnt(0) 3355; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3356; GFX9-NEXT: s_waitcnt vmcnt(0) 3357; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3358; GFX9-NEXT: s_waitcnt vmcnt(0) 3359; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3360; GFX9-NEXT: s_waitcnt vmcnt(0) 3361; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:4 3362; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3363; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3364; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3365; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3366; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:48 3367; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:44 3368; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:40 3369; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:36 3370; GFX9-NEXT: s_nop 0 3371; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3372; GFX9-NEXT: s_waitcnt vmcnt(0) 3373; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3374; GFX9-NEXT: s_waitcnt vmcnt(0) 3375; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3376; GFX9-NEXT: s_waitcnt vmcnt(0) 3377; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3378; GFX9-NEXT: s_waitcnt vmcnt(0) 3379; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3380; GFX9-NEXT: s_waitcnt vmcnt(0) 3381; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3382; GFX9-NEXT: s_waitcnt vmcnt(0) 3383; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3384; GFX9-NEXT: s_waitcnt vmcnt(0) 3385; GFX9-NEXT: s_setpc_b64 s[30:31] 3386; 3387; GFX11-LABEL: void_func_v32i32_v8i32_v8f32: 3388; GFX11: ; %bb.0: 3389; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3390; GFX11-NEXT: s_clause 0x10 3391; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:48 3392; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:44 3393; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:40 3394; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:64 3395; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:60 3396; GFX11-NEXT: scratch_load_b32 v31, off, s32 3397; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:56 3398; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:16 3399; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:12 3400; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:8 3401; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:32 3402; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:28 3403; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:24 3404; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:20 3405; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:4 3406; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:52 3407; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:36 3408; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3409; GFX11-NEXT: s_mov_b32 s2, -1 3410; GFX11-NEXT: s_waitcnt vmcnt(11) 3411; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 3412; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3413; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 3414; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3415; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 3416; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3417; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 3418; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3419; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 3420; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3421; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 3422; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3423; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 3424; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3425; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 3426; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3427; GFX11-NEXT: s_waitcnt vmcnt(3) 3428; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc 3429; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3430; GFX11-NEXT: s_waitcnt vmcnt(2) 3431; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc 3432; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3433; GFX11-NEXT: s_waitcnt vmcnt(1) 3434; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc 3435; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3436; GFX11-NEXT: s_waitcnt vmcnt(0) 3437; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc 3438; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3439; GFX11-NEXT: s_setpc_b64 s[30:31] 3440 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 3441 store volatile <8 x i32> %arg1, ptr addrspace(1) undef 3442 store volatile <8 x float> %arg2, ptr addrspace(1) undef 3443 ret void 3444} 3445 3446define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { 3447; CI-LABEL: void_func_v32i32_v16i32_v16f32: 3448; CI: ; %bb.0: 3449; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3450; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 3451; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3452; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3453; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3454; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3455; CI-NEXT: s_mov_b32 s7, 0xf000 3456; CI-NEXT: s_mov_b32 s6, -1 3457; CI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 3458; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 3459; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 3460; CI-NEXT: s_waitcnt vmcnt(7) 3461; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3462; CI-NEXT: s_waitcnt vmcnt(0) 3463; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3464; CI-NEXT: s_waitcnt vmcnt(0) 3465; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3466; CI-NEXT: s_waitcnt vmcnt(0) 3467; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3468; CI-NEXT: s_waitcnt vmcnt(0) 3469; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3470; CI-NEXT: s_waitcnt vmcnt(0) 3471; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 3472; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3473; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3474; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3475; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3476; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 3477; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 3478; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 3479; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3480; CI-NEXT: s_waitcnt vmcnt(0) 3481; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3482; CI-NEXT: s_waitcnt vmcnt(0) 3483; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 3484; CI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 3485; CI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 3486; CI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 3487; CI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 3488; CI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 3489; CI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 3490; CI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 3491; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3492; CI-NEXT: s_waitcnt vmcnt(0) 3493; CI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3494; CI-NEXT: s_waitcnt vmcnt(0) 3495; CI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 3496; CI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 3497; CI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 3498; CI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 3499; CI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 3500; CI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 3501; CI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 3502; CI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 3503; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 3504; CI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3505; CI-NEXT: s_waitcnt vmcnt(0) 3506; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3507; CI-NEXT: s_waitcnt vmcnt(0) 3508; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3509; CI-NEXT: s_waitcnt vmcnt(0) 3510; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3511; CI-NEXT: s_waitcnt vmcnt(0) 3512; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3513; CI-NEXT: s_waitcnt vmcnt(0) 3514; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3515; CI-NEXT: s_waitcnt vmcnt(0) 3516; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3517; CI-NEXT: s_waitcnt vmcnt(0) 3518; CI-NEXT: s_setpc_b64 s[30:31] 3519; 3520; VI-LABEL: void_func_v32i32_v16i32_v16f32: 3521; VI: ; %bb.0: 3522; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3523; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 3524; VI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3525; VI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3526; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3527; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3528; VI-NEXT: s_mov_b32 s7, 0xf000 3529; VI-NEXT: s_mov_b32 s6, -1 3530; VI-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 3531; VI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 3532; VI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 3533; VI-NEXT: s_waitcnt vmcnt(7) 3534; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3535; VI-NEXT: s_waitcnt vmcnt(0) 3536; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3537; VI-NEXT: s_waitcnt vmcnt(0) 3538; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3539; VI-NEXT: s_waitcnt vmcnt(0) 3540; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3541; VI-NEXT: s_waitcnt vmcnt(0) 3542; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3543; VI-NEXT: s_waitcnt vmcnt(0) 3544; VI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 3545; VI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3546; VI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3547; VI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3548; VI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3549; VI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 3550; VI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 3551; VI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 3552; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3553; VI-NEXT: s_waitcnt vmcnt(0) 3554; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3555; VI-NEXT: s_waitcnt vmcnt(0) 3556; VI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 3557; VI-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 3558; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 3559; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 3560; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 3561; VI-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 3562; VI-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 3563; VI-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 3564; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3565; VI-NEXT: s_waitcnt vmcnt(0) 3566; VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3567; VI-NEXT: s_waitcnt vmcnt(0) 3568; VI-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 3569; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 3570; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 3571; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 3572; VI-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 3573; VI-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 3574; VI-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 3575; VI-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 3576; VI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 3577; VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3578; VI-NEXT: s_waitcnt vmcnt(0) 3579; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3580; VI-NEXT: s_waitcnt vmcnt(0) 3581; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3582; VI-NEXT: s_waitcnt vmcnt(0) 3583; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3584; VI-NEXT: s_waitcnt vmcnt(0) 3585; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3586; VI-NEXT: s_waitcnt vmcnt(0) 3587; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3588; VI-NEXT: s_waitcnt vmcnt(0) 3589; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3590; VI-NEXT: s_waitcnt vmcnt(0) 3591; VI-NEXT: s_setpc_b64 s[30:31] 3592; 3593; GFX9-LABEL: void_func_v32i32_v16i32_v16f32: 3594; GFX9: ; %bb.0: 3595; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3596; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 3597; GFX9-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:64 3598; GFX9-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:60 3599; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:56 3600; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:52 3601; GFX9-NEXT: s_mov_b32 s7, 0xf000 3602; GFX9-NEXT: s_mov_b32 s6, -1 3603; GFX9-NEXT: buffer_load_dword v39, off, s[0:3], s32 offset:48 3604; GFX9-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:44 3605; GFX9-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:40 3606; GFX9-NEXT: s_waitcnt vmcnt(7) 3607; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3608; GFX9-NEXT: s_waitcnt vmcnt(0) 3609; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3610; GFX9-NEXT: s_waitcnt vmcnt(0) 3611; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3612; GFX9-NEXT: s_waitcnt vmcnt(0) 3613; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3614; GFX9-NEXT: s_waitcnt vmcnt(0) 3615; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3616; GFX9-NEXT: s_waitcnt vmcnt(0) 3617; GFX9-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:36 3618; GFX9-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:32 3619; GFX9-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:28 3620; GFX9-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:24 3621; GFX9-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:20 3622; GFX9-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:16 3623; GFX9-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:12 3624; GFX9-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:8 3625; GFX9-NEXT: s_nop 0 3626; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3627; GFX9-NEXT: s_waitcnt vmcnt(0) 3628; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3629; GFX9-NEXT: s_waitcnt vmcnt(0) 3630; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:4 3631; GFX9-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:96 3632; GFX9-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:92 3633; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:88 3634; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:84 3635; GFX9-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:112 3636; GFX9-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:108 3637; GFX9-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:104 3638; GFX9-NEXT: s_nop 0 3639; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3640; GFX9-NEXT: s_waitcnt vmcnt(0) 3641; GFX9-NEXT: buffer_store_dwordx4 v[32:35], off, s[4:7], 0 3642; GFX9-NEXT: s_waitcnt vmcnt(0) 3643; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:100 3644; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:128 3645; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:124 3646; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:120 3647; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:116 3648; GFX9-NEXT: buffer_load_dword v23, off, s[0:3], s32 offset:80 3649; GFX9-NEXT: buffer_load_dword v22, off, s[0:3], s32 offset:76 3650; GFX9-NEXT: buffer_load_dword v21, off, s[0:3], s32 offset:72 3651; GFX9-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:68 3652; GFX9-NEXT: s_nop 0 3653; GFX9-NEXT: buffer_store_dwordx4 v[36:39], off, s[4:7], 0 3654; GFX9-NEXT: s_waitcnt vmcnt(0) 3655; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3656; GFX9-NEXT: s_waitcnt vmcnt(0) 3657; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3658; GFX9-NEXT: s_waitcnt vmcnt(0) 3659; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3660; GFX9-NEXT: s_waitcnt vmcnt(0) 3661; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3662; GFX9-NEXT: s_waitcnt vmcnt(0) 3663; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3664; GFX9-NEXT: s_waitcnt vmcnt(0) 3665; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3666; GFX9-NEXT: s_waitcnt vmcnt(0) 3667; GFX9-NEXT: s_setpc_b64 s[30:31] 3668; 3669; GFX11-LABEL: void_func_v32i32_v16i32_v16f32: 3670; GFX11: ; %bb.0: 3671; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3672; GFX11-NEXT: s_clause 0x1f 3673; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:80 3674; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:76 3675; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:72 3676; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:96 3677; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:92 3678; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:88 3679; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:112 3680; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:108 3681; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:104 3682; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:128 3683; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:124 3684; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:120 3685; GFX11-NEXT: scratch_load_b32 v67, off, s32 offset:16 3686; GFX11-NEXT: scratch_load_b32 v66, off, s32 offset:12 3687; GFX11-NEXT: scratch_load_b32 v65, off, s32 offset:8 3688; GFX11-NEXT: scratch_load_b32 v71, off, s32 offset:32 3689; GFX11-NEXT: scratch_load_b32 v70, off, s32 offset:28 3690; GFX11-NEXT: scratch_load_b32 v31, off, s32 3691; GFX11-NEXT: scratch_load_b32 v69, off, s32 offset:24 3692; GFX11-NEXT: scratch_load_b32 v83, off, s32 offset:48 3693; GFX11-NEXT: scratch_load_b32 v82, off, s32 offset:44 3694; GFX11-NEXT: scratch_load_b32 v81, off, s32 offset:40 3695; GFX11-NEXT: scratch_load_b32 v87, off, s32 offset:64 3696; GFX11-NEXT: scratch_load_b32 v86, off, s32 offset:60 3697; GFX11-NEXT: scratch_load_b32 v85, off, s32 offset:56 3698; GFX11-NEXT: scratch_load_b32 v84, off, s32 offset:52 3699; GFX11-NEXT: scratch_load_b32 v80, off, s32 offset:36 3700; GFX11-NEXT: scratch_load_b32 v68, off, s32 offset:20 3701; GFX11-NEXT: scratch_load_b32 v64, off, s32 offset:4 3702; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:116 3703; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:100 3704; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:84 3705; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:68 3706; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3707; GFX11-NEXT: s_mov_b32 s2, -1 3708; GFX11-NEXT: s_waitcnt vmcnt(15) 3709; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 3710; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3711; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 3712; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3713; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 3714; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3715; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 3716; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3717; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 3718; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3719; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 3720; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3721; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 3722; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3723; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 3724; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3725; GFX11-NEXT: s_waitcnt vmcnt(7) 3726; GFX11-NEXT: buffer_store_b128 v[84:87], off, s[0:3], 0 dlc 3727; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3728; GFX11-NEXT: s_waitcnt vmcnt(6) 3729; GFX11-NEXT: buffer_store_b128 v[80:83], off, s[0:3], 0 dlc 3730; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3731; GFX11-NEXT: s_waitcnt vmcnt(5) 3732; GFX11-NEXT: buffer_store_b128 v[68:71], off, s[0:3], 0 dlc 3733; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3734; GFX11-NEXT: s_waitcnt vmcnt(4) 3735; GFX11-NEXT: buffer_store_b128 v[64:67], off, s[0:3], 0 dlc 3736; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3737; GFX11-NEXT: s_waitcnt vmcnt(3) 3738; GFX11-NEXT: buffer_store_b128 v[52:55], off, s[0:3], 0 dlc 3739; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3740; GFX11-NEXT: s_waitcnt vmcnt(2) 3741; GFX11-NEXT: buffer_store_b128 v[48:51], off, s[0:3], 0 dlc 3742; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3743; GFX11-NEXT: s_waitcnt vmcnt(1) 3744; GFX11-NEXT: buffer_store_b128 v[36:39], off, s[0:3], 0 dlc 3745; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3746; GFX11-NEXT: s_waitcnt vmcnt(0) 3747; GFX11-NEXT: buffer_store_b128 v[32:35], off, s[0:3], 0 dlc 3748; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3749; GFX11-NEXT: s_setpc_b64 s[30:31] 3750 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 3751 store volatile <16 x i32> %arg1, ptr addrspace(1) undef 3752 store volatile <16 x float> %arg2, ptr addrspace(1) undef 3753 ret void 3754} 3755 3756; Make sure v3 isn't a wasted register because of v3 types being promoted to v4 3757define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { 3758; CI-LABEL: void_func_v3f32_wasted_reg: 3759; CI: ; %bb.0: 3760; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3761; CI-NEXT: s_mov_b32 m0, -1 3762; CI-NEXT: ds_write_b32 v0, v0 3763; CI-NEXT: ds_write_b32 v0, v1 3764; CI-NEXT: ds_write_b32 v0, v2 3765; CI-NEXT: ds_write_b32 v0, v3 3766; CI-NEXT: s_waitcnt lgkmcnt(0) 3767; CI-NEXT: s_setpc_b64 s[30:31] 3768; 3769; VI-LABEL: void_func_v3f32_wasted_reg: 3770; VI: ; %bb.0: 3771; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3772; VI-NEXT: s_mov_b32 m0, -1 3773; VI-NEXT: ds_write_b32 v0, v0 3774; VI-NEXT: ds_write_b32 v0, v1 3775; VI-NEXT: ds_write_b32 v0, v2 3776; VI-NEXT: ds_write_b32 v0, v3 3777; VI-NEXT: s_waitcnt lgkmcnt(0) 3778; VI-NEXT: s_setpc_b64 s[30:31] 3779; 3780; GFX9-LABEL: void_func_v3f32_wasted_reg: 3781; GFX9: ; %bb.0: 3782; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3783; GFX9-NEXT: ds_write_b32 v0, v0 3784; GFX9-NEXT: ds_write_b32 v0, v1 3785; GFX9-NEXT: ds_write_b32 v0, v2 3786; GFX9-NEXT: ds_write_b32 v0, v3 3787; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3788; GFX9-NEXT: s_setpc_b64 s[30:31] 3789; 3790; GFX11-LABEL: void_func_v3f32_wasted_reg: 3791; GFX11: ; %bb.0: 3792; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3793; GFX11-NEXT: ds_store_b32 v0, v0 3794; GFX11-NEXT: ds_store_b32 v0, v1 3795; GFX11-NEXT: ds_store_b32 v0, v2 3796; GFX11-NEXT: ds_store_b32 v0, v3 3797; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3798; GFX11-NEXT: s_setpc_b64 s[30:31] 3799 %arg0.0 = extractelement <3 x float> %arg0, i32 0 3800 %arg0.1 = extractelement <3 x float> %arg0, i32 1 3801 %arg0.2 = extractelement <3 x float> %arg0, i32 2 3802 store volatile float %arg0.0, ptr addrspace(3) undef 3803 store volatile float %arg0.1, ptr addrspace(3) undef 3804 store volatile float %arg0.2, ptr addrspace(3) undef 3805 store volatile i32 %arg1, ptr addrspace(3) undef 3806 ret void 3807} 3808 3809define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { 3810; CI-LABEL: void_func_v3i32_wasted_reg: 3811; CI: ; %bb.0: 3812; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3813; CI-NEXT: s_mov_b32 m0, -1 3814; CI-NEXT: ds_write_b32 v0, v0 3815; CI-NEXT: ds_write_b32 v0, v1 3816; CI-NEXT: ds_write_b32 v0, v2 3817; CI-NEXT: ds_write_b32 v0, v3 3818; CI-NEXT: s_waitcnt lgkmcnt(0) 3819; CI-NEXT: s_setpc_b64 s[30:31] 3820; 3821; VI-LABEL: void_func_v3i32_wasted_reg: 3822; VI: ; %bb.0: 3823; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3824; VI-NEXT: s_mov_b32 m0, -1 3825; VI-NEXT: ds_write_b32 v0, v0 3826; VI-NEXT: ds_write_b32 v0, v1 3827; VI-NEXT: ds_write_b32 v0, v2 3828; VI-NEXT: ds_write_b32 v0, v3 3829; VI-NEXT: s_waitcnt lgkmcnt(0) 3830; VI-NEXT: s_setpc_b64 s[30:31] 3831; 3832; GFX9-LABEL: void_func_v3i32_wasted_reg: 3833; GFX9: ; %bb.0: 3834; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3835; GFX9-NEXT: ds_write_b32 v0, v0 3836; GFX9-NEXT: ds_write_b32 v0, v1 3837; GFX9-NEXT: ds_write_b32 v0, v2 3838; GFX9-NEXT: ds_write_b32 v0, v3 3839; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3840; GFX9-NEXT: s_setpc_b64 s[30:31] 3841; 3842; GFX11-LABEL: void_func_v3i32_wasted_reg: 3843; GFX11: ; %bb.0: 3844; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3845; GFX11-NEXT: ds_store_b32 v0, v0 3846; GFX11-NEXT: ds_store_b32 v0, v1 3847; GFX11-NEXT: ds_store_b32 v0, v2 3848; GFX11-NEXT: ds_store_b32 v0, v3 3849; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3850; GFX11-NEXT: s_setpc_b64 s[30:31] 3851 %arg0.0 = extractelement <3 x i32> %arg0, i32 0 3852 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 3853 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 3854 store volatile i32 %arg0.0, ptr addrspace(3) undef 3855 store volatile i32 %arg0.1, ptr addrspace(3) undef 3856 store volatile i32 %arg0.2, ptr addrspace(3) undef 3857 store volatile i32 %arg1, ptr addrspace(3) undef 3858 ret void 3859} 3860 3861; Check there is no crash. 3862define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 { 3863; CIGFX89-LABEL: void_func_volatile_v16i8: 3864; CIGFX89: ; %bb.0: 3865; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3866; CIGFX89-NEXT: s_mov_b32 s7, 0xf000 3867; CIGFX89-NEXT: s_mov_b32 s6, -1 3868; CIGFX89-NEXT: buffer_store_byte v15, off, s[4:7], 0 3869; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3870; CIGFX89-NEXT: buffer_store_byte v14, off, s[4:7], 0 3871; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3872; CIGFX89-NEXT: buffer_store_byte v13, off, s[4:7], 0 3873; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3874; CIGFX89-NEXT: buffer_store_byte v12, off, s[4:7], 0 3875; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3876; CIGFX89-NEXT: buffer_store_byte v11, off, s[4:7], 0 3877; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3878; CIGFX89-NEXT: buffer_store_byte v10, off, s[4:7], 0 3879; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3880; CIGFX89-NEXT: buffer_store_byte v9, off, s[4:7], 0 3881; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3882; CIGFX89-NEXT: buffer_store_byte v8, off, s[4:7], 0 3883; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3884; CIGFX89-NEXT: buffer_store_byte v7, off, s[4:7], 0 3885; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3886; CIGFX89-NEXT: buffer_store_byte v6, off, s[4:7], 0 3887; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3888; CIGFX89-NEXT: buffer_store_byte v5, off, s[4:7], 0 3889; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3890; CIGFX89-NEXT: buffer_store_byte v4, off, s[4:7], 0 3891; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3892; CIGFX89-NEXT: buffer_store_byte v3, off, s[4:7], 0 3893; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3894; CIGFX89-NEXT: buffer_store_byte v2, off, s[4:7], 0 3895; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3896; CIGFX89-NEXT: buffer_store_byte v1, off, s[4:7], 0 3897; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3898; CIGFX89-NEXT: buffer_store_byte v0, off, s[4:7], 0 3899; CIGFX89-NEXT: s_waitcnt vmcnt(0) 3900; CIGFX89-NEXT: s_setpc_b64 s[30:31] 3901; 3902; GFX11-LABEL: void_func_volatile_v16i8: 3903; GFX11: ; %bb.0: 3904; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3905; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3906; GFX11-NEXT: s_mov_b32 s2, -1 3907; GFX11-NEXT: buffer_store_b8 v15, off, s[0:3], 0 dlc 3908; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3909; GFX11-NEXT: buffer_store_b8 v14, off, s[0:3], 0 dlc 3910; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3911; GFX11-NEXT: buffer_store_b8 v13, off, s[0:3], 0 dlc 3912; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3913; GFX11-NEXT: buffer_store_b8 v12, off, s[0:3], 0 dlc 3914; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3915; GFX11-NEXT: buffer_store_b8 v11, off, s[0:3], 0 dlc 3916; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3917; GFX11-NEXT: buffer_store_b8 v10, off, s[0:3], 0 dlc 3918; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3919; GFX11-NEXT: buffer_store_b8 v9, off, s[0:3], 0 dlc 3920; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3921; GFX11-NEXT: buffer_store_b8 v8, off, s[0:3], 0 dlc 3922; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3923; GFX11-NEXT: buffer_store_b8 v7, off, s[0:3], 0 dlc 3924; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3925; GFX11-NEXT: buffer_store_b8 v6, off, s[0:3], 0 dlc 3926; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3927; GFX11-NEXT: buffer_store_b8 v5, off, s[0:3], 0 dlc 3928; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3929; GFX11-NEXT: buffer_store_b8 v4, off, s[0:3], 0 dlc 3930; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3931; GFX11-NEXT: buffer_store_b8 v3, off, s[0:3], 0 dlc 3932; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3933; GFX11-NEXT: buffer_store_b8 v2, off, s[0:3], 0 dlc 3934; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3935; GFX11-NEXT: buffer_store_b8 v1, off, s[0:3], 0 dlc 3936; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3937; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc 3938; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3939; GFX11-NEXT: s_setpc_b64 s[30:31] 3940 store volatile <16 x i8> %arg0, ptr addrspace(1) undef 3941 ret void 3942} 3943 3944; Check there is no crash. 3945define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { 3946; CI-LABEL: void_func_v32i32_v16i8: 3947; CI: ; %bb.0: 3948; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3949; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 3950; CI-NEXT: s_mov_b32 s7, 0xf000 3951; CI-NEXT: s_mov_b32 s6, -1 3952; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:60 3953; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:64 3954; CI-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:48 3955; CI-NEXT: buffer_load_dword v35, off, s[0:3], s32 offset:52 3956; CI-NEXT: buffer_load_dword v36, off, s[0:3], s32 offset:56 3957; CI-NEXT: buffer_load_dword v37, off, s[0:3], s32 offset:36 3958; CI-NEXT: buffer_load_dword v38, off, s[0:3], s32 offset:40 3959; CI-NEXT: s_waitcnt vmcnt(7) 3960; CI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 3961; CI-NEXT: s_waitcnt vmcnt(0) 3962; CI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 3963; CI-NEXT: s_waitcnt vmcnt(0) 3964; CI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 3965; CI-NEXT: s_waitcnt vmcnt(0) 3966; CI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 3967; CI-NEXT: s_waitcnt vmcnt(0) 3968; CI-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:28 3969; CI-NEXT: buffer_load_dword v17, off, s[0:3], s32 offset:32 3970; CI-NEXT: buffer_load_dword v18, off, s[0:3], s32 offset:20 3971; CI-NEXT: buffer_load_dword v19, off, s[0:3], s32 offset:24 3972; CI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 3973; CI-NEXT: s_waitcnt vmcnt(0) 3974; CI-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:16 3975; CI-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:12 3976; CI-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:8 3977; CI-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:4 3978; CI-NEXT: buffer_load_dword v20, off, s[0:3], s32 offset:44 3979; CI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 3980; CI-NEXT: s_waitcnt vmcnt(0) 3981; CI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 3982; CI-NEXT: s_waitcnt vmcnt(0) 3983; CI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 3984; CI-NEXT: s_waitcnt vmcnt(0) 3985; CI-NEXT: buffer_store_byte v33, off, s[4:7], 0 3986; CI-NEXT: s_waitcnt vmcnt(0) 3987; CI-NEXT: buffer_store_byte v32, off, s[4:7], 0 3988; CI-NEXT: s_waitcnt vmcnt(0) 3989; CI-NEXT: buffer_store_byte v36, off, s[4:7], 0 3990; CI-NEXT: s_waitcnt vmcnt(0) 3991; CI-NEXT: buffer_store_byte v35, off, s[4:7], 0 3992; CI-NEXT: s_waitcnt vmcnt(0) 3993; CI-NEXT: buffer_store_byte v34, off, s[4:7], 0 3994; CI-NEXT: s_waitcnt vmcnt(0) 3995; CI-NEXT: buffer_store_byte v20, off, s[4:7], 0 3996; CI-NEXT: s_waitcnt vmcnt(0) 3997; CI-NEXT: buffer_store_byte v38, off, s[4:7], 0 3998; CI-NEXT: s_waitcnt vmcnt(0) 3999; CI-NEXT: buffer_store_byte v37, off, s[4:7], 0 4000; CI-NEXT: s_waitcnt vmcnt(0) 4001; CI-NEXT: buffer_store_byte v17, off, s[4:7], 0 4002; CI-NEXT: s_waitcnt vmcnt(0) 4003; CI-NEXT: buffer_store_byte v16, off, s[4:7], 0 4004; CI-NEXT: s_waitcnt vmcnt(0) 4005; CI-NEXT: buffer_store_byte v19, off, s[4:7], 0 4006; CI-NEXT: s_waitcnt vmcnt(0) 4007; CI-NEXT: buffer_store_byte v18, off, s[4:7], 0 4008; CI-NEXT: s_waitcnt vmcnt(0) 4009; CI-NEXT: buffer_store_byte v12, off, s[4:7], 0 4010; CI-NEXT: s_waitcnt vmcnt(0) 4011; CI-NEXT: buffer_store_byte v13, off, s[4:7], 0 4012; CI-NEXT: s_waitcnt vmcnt(0) 4013; CI-NEXT: buffer_store_byte v14, off, s[4:7], 0 4014; CI-NEXT: s_waitcnt vmcnt(0) 4015; CI-NEXT: buffer_store_byte v15, off, s[4:7], 0 4016; CI-NEXT: s_waitcnt vmcnt(0) 4017; CI-NEXT: s_setpc_b64 s[30:31] 4018; 4019; VI-LABEL: void_func_v32i32_v16i8: 4020; VI: ; %bb.0: 4021; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4022; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 4023; VI-NEXT: s_mov_b32 s7, 0xf000 4024; VI-NEXT: s_mov_b32 s6, -1 4025; VI-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 4026; VI-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 4027; VI-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 4028; VI-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 4029; VI-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 4030; VI-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 4031; VI-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40 4032; VI-NEXT: s_waitcnt vmcnt(7) 4033; VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 4034; VI-NEXT: s_waitcnt vmcnt(0) 4035; VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 4036; VI-NEXT: s_waitcnt vmcnt(0) 4037; VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 4038; VI-NEXT: s_waitcnt vmcnt(0) 4039; VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 4040; VI-NEXT: s_waitcnt vmcnt(0) 4041; VI-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 4042; VI-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 4043; VI-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 4044; VI-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 4045; VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 4046; VI-NEXT: s_waitcnt vmcnt(0) 4047; VI-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 4048; VI-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 4049; VI-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 4050; VI-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 4051; VI-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 4052; VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 4053; VI-NEXT: s_waitcnt vmcnt(0) 4054; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 4055; VI-NEXT: s_waitcnt vmcnt(0) 4056; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4057; VI-NEXT: s_waitcnt vmcnt(0) 4058; VI-NEXT: buffer_store_byte v33, off, s[4:7], 0 4059; VI-NEXT: s_waitcnt vmcnt(0) 4060; VI-NEXT: buffer_store_byte v32, off, s[4:7], 0 4061; VI-NEXT: s_waitcnt vmcnt(0) 4062; VI-NEXT: buffer_store_byte v36, off, s[4:7], 0 4063; VI-NEXT: s_waitcnt vmcnt(0) 4064; VI-NEXT: buffer_store_byte v35, off, s[4:7], 0 4065; VI-NEXT: s_waitcnt vmcnt(0) 4066; VI-NEXT: buffer_store_byte v34, off, s[4:7], 0 4067; VI-NEXT: s_waitcnt vmcnt(0) 4068; VI-NEXT: buffer_store_byte v20, off, s[4:7], 0 4069; VI-NEXT: s_waitcnt vmcnt(0) 4070; VI-NEXT: buffer_store_byte v38, off, s[4:7], 0 4071; VI-NEXT: s_waitcnt vmcnt(0) 4072; VI-NEXT: buffer_store_byte v37, off, s[4:7], 0 4073; VI-NEXT: s_waitcnt vmcnt(0) 4074; VI-NEXT: buffer_store_byte v17, off, s[4:7], 0 4075; VI-NEXT: s_waitcnt vmcnt(0) 4076; VI-NEXT: buffer_store_byte v16, off, s[4:7], 0 4077; VI-NEXT: s_waitcnt vmcnt(0) 4078; VI-NEXT: buffer_store_byte v19, off, s[4:7], 0 4079; VI-NEXT: s_waitcnt vmcnt(0) 4080; VI-NEXT: buffer_store_byte v18, off, s[4:7], 0 4081; VI-NEXT: s_waitcnt vmcnt(0) 4082; VI-NEXT: buffer_store_byte v12, off, s[4:7], 0 4083; VI-NEXT: s_waitcnt vmcnt(0) 4084; VI-NEXT: buffer_store_byte v13, off, s[4:7], 0 4085; VI-NEXT: s_waitcnt vmcnt(0) 4086; VI-NEXT: buffer_store_byte v14, off, s[4:7], 0 4087; VI-NEXT: s_waitcnt vmcnt(0) 4088; VI-NEXT: buffer_store_byte v15, off, s[4:7], 0 4089; VI-NEXT: s_waitcnt vmcnt(0) 4090; VI-NEXT: s_setpc_b64 s[30:31] 4091; 4092; GFX9-LABEL: void_func_v32i32_v16i8: 4093; GFX9: ; %bb.0: 4094; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4095; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 4096; GFX9-NEXT: s_mov_b32 s7, 0xf000 4097; GFX9-NEXT: s_mov_b32 s6, -1 4098; GFX9-NEXT: buffer_load_ubyte v32, off, s[0:3], s32 offset:60 4099; GFX9-NEXT: buffer_load_ubyte v33, off, s[0:3], s32 offset:64 4100; GFX9-NEXT: buffer_load_ubyte v34, off, s[0:3], s32 offset:48 4101; GFX9-NEXT: buffer_load_ubyte v35, off, s[0:3], s32 offset:52 4102; GFX9-NEXT: buffer_load_ubyte v36, off, s[0:3], s32 offset:56 4103; GFX9-NEXT: buffer_load_ubyte v37, off, s[0:3], s32 offset:36 4104; GFX9-NEXT: buffer_load_ubyte v38, off, s[0:3], s32 offset:40 4105; GFX9-NEXT: s_waitcnt vmcnt(7) 4106; GFX9-NEXT: buffer_store_dwordx4 v[28:31], off, s[4:7], 0 4107; GFX9-NEXT: s_waitcnt vmcnt(0) 4108; GFX9-NEXT: buffer_store_dwordx4 v[24:27], off, s[4:7], 0 4109; GFX9-NEXT: s_waitcnt vmcnt(0) 4110; GFX9-NEXT: buffer_store_dwordx4 v[20:23], off, s[4:7], 0 4111; GFX9-NEXT: s_waitcnt vmcnt(0) 4112; GFX9-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 4113; GFX9-NEXT: s_waitcnt vmcnt(0) 4114; GFX9-NEXT: buffer_load_ubyte v16, off, s[0:3], s32 offset:28 4115; GFX9-NEXT: buffer_load_ubyte v17, off, s[0:3], s32 offset:32 4116; GFX9-NEXT: buffer_load_ubyte v18, off, s[0:3], s32 offset:20 4117; GFX9-NEXT: buffer_load_ubyte v19, off, s[0:3], s32 offset:24 4118; GFX9-NEXT: buffer_load_ubyte v20, off, s[0:3], s32 offset:44 4119; GFX9-NEXT: s_nop 0 4120; GFX9-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 4121; GFX9-NEXT: s_waitcnt vmcnt(0) 4122; GFX9-NEXT: buffer_load_ubyte v12, off, s[0:3], s32 offset:16 4123; GFX9-NEXT: buffer_load_ubyte v13, off, s[0:3], s32 offset:12 4124; GFX9-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 offset:8 4125; GFX9-NEXT: buffer_load_ubyte v15, off, s[0:3], s32 offset:4 4126; GFX9-NEXT: s_nop 0 4127; GFX9-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 4128; GFX9-NEXT: s_waitcnt vmcnt(0) 4129; GFX9-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 4130; GFX9-NEXT: s_waitcnt vmcnt(0) 4131; GFX9-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4132; GFX9-NEXT: s_waitcnt vmcnt(0) 4133; GFX9-NEXT: buffer_store_byte v33, off, s[4:7], 0 4134; GFX9-NEXT: s_waitcnt vmcnt(0) 4135; GFX9-NEXT: buffer_store_byte v32, off, s[4:7], 0 4136; GFX9-NEXT: s_waitcnt vmcnt(0) 4137; GFX9-NEXT: buffer_store_byte v36, off, s[4:7], 0 4138; GFX9-NEXT: s_waitcnt vmcnt(0) 4139; GFX9-NEXT: buffer_store_byte v35, off, s[4:7], 0 4140; GFX9-NEXT: s_waitcnt vmcnt(0) 4141; GFX9-NEXT: buffer_store_byte v34, off, s[4:7], 0 4142; GFX9-NEXT: s_waitcnt vmcnt(0) 4143; GFX9-NEXT: buffer_store_byte v20, off, s[4:7], 0 4144; GFX9-NEXT: s_waitcnt vmcnt(0) 4145; GFX9-NEXT: buffer_store_byte v38, off, s[4:7], 0 4146; GFX9-NEXT: s_waitcnt vmcnt(0) 4147; GFX9-NEXT: buffer_store_byte v37, off, s[4:7], 0 4148; GFX9-NEXT: s_waitcnt vmcnt(0) 4149; GFX9-NEXT: buffer_store_byte v17, off, s[4:7], 0 4150; GFX9-NEXT: s_waitcnt vmcnt(0) 4151; GFX9-NEXT: buffer_store_byte v16, off, s[4:7], 0 4152; GFX9-NEXT: s_waitcnt vmcnt(0) 4153; GFX9-NEXT: buffer_store_byte v19, off, s[4:7], 0 4154; GFX9-NEXT: s_waitcnt vmcnt(0) 4155; GFX9-NEXT: buffer_store_byte v18, off, s[4:7], 0 4156; GFX9-NEXT: s_waitcnt vmcnt(0) 4157; GFX9-NEXT: buffer_store_byte v12, off, s[4:7], 0 4158; GFX9-NEXT: s_waitcnt vmcnt(0) 4159; GFX9-NEXT: buffer_store_byte v13, off, s[4:7], 0 4160; GFX9-NEXT: s_waitcnt vmcnt(0) 4161; GFX9-NEXT: buffer_store_byte v14, off, s[4:7], 0 4162; GFX9-NEXT: s_waitcnt vmcnt(0) 4163; GFX9-NEXT: buffer_store_byte v15, off, s[4:7], 0 4164; GFX9-NEXT: s_waitcnt vmcnt(0) 4165; GFX9-NEXT: s_setpc_b64 s[30:31] 4166; 4167; GFX11-LABEL: void_func_v32i32_v16i8: 4168; GFX11: ; %bb.0: 4169; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4170; GFX11-NEXT: s_clause 0x10 4171; GFX11-NEXT: scratch_load_b32 v31, off, s32 4172; GFX11-NEXT: scratch_load_u8 v32, off, s32 offset:64 4173; GFX11-NEXT: scratch_load_u8 v33, off, s32 offset:60 4174; GFX11-NEXT: scratch_load_u8 v34, off, s32 offset:56 4175; GFX11-NEXT: scratch_load_u8 v35, off, s32 offset:52 4176; GFX11-NEXT: scratch_load_u8 v36, off, s32 offset:48 4177; GFX11-NEXT: scratch_load_u8 v37, off, s32 offset:44 4178; GFX11-NEXT: scratch_load_u8 v38, off, s32 offset:40 4179; GFX11-NEXT: scratch_load_u8 v39, off, s32 offset:36 4180; GFX11-NEXT: scratch_load_u8 v48, off, s32 offset:32 4181; GFX11-NEXT: scratch_load_u8 v49, off, s32 offset:28 4182; GFX11-NEXT: scratch_load_u8 v50, off, s32 offset:24 4183; GFX11-NEXT: scratch_load_u8 v51, off, s32 offset:20 4184; GFX11-NEXT: scratch_load_u8 v52, off, s32 offset:16 4185; GFX11-NEXT: scratch_load_u8 v53, off, s32 offset:12 4186; GFX11-NEXT: scratch_load_u8 v54, off, s32 offset:8 4187; GFX11-NEXT: scratch_load_u8 v55, off, s32 offset:4 4188; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4189; GFX11-NEXT: s_mov_b32 s2, -1 4190; GFX11-NEXT: s_waitcnt vmcnt(16) 4191; GFX11-NEXT: buffer_store_b128 v[28:31], off, s[0:3], 0 dlc 4192; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4193; GFX11-NEXT: buffer_store_b128 v[24:27], off, s[0:3], 0 dlc 4194; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4195; GFX11-NEXT: buffer_store_b128 v[20:23], off, s[0:3], 0 dlc 4196; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4197; GFX11-NEXT: buffer_store_b128 v[16:19], off, s[0:3], 0 dlc 4198; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4199; GFX11-NEXT: buffer_store_b128 v[12:15], off, s[0:3], 0 dlc 4200; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4201; GFX11-NEXT: buffer_store_b128 v[8:11], off, s[0:3], 0 dlc 4202; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4203; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 dlc 4204; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4205; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 dlc 4206; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4207; GFX11-NEXT: s_waitcnt vmcnt(15) 4208; GFX11-NEXT: buffer_store_b8 v32, off, s[0:3], 0 dlc 4209; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4210; GFX11-NEXT: s_waitcnt vmcnt(14) 4211; GFX11-NEXT: buffer_store_b8 v33, off, s[0:3], 0 dlc 4212; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4213; GFX11-NEXT: s_waitcnt vmcnt(13) 4214; GFX11-NEXT: buffer_store_b8 v34, off, s[0:3], 0 dlc 4215; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4216; GFX11-NEXT: s_waitcnt vmcnt(12) 4217; GFX11-NEXT: buffer_store_b8 v35, off, s[0:3], 0 dlc 4218; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4219; GFX11-NEXT: s_waitcnt vmcnt(11) 4220; GFX11-NEXT: buffer_store_b8 v36, off, s[0:3], 0 dlc 4221; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4222; GFX11-NEXT: s_waitcnt vmcnt(10) 4223; GFX11-NEXT: buffer_store_b8 v37, off, s[0:3], 0 dlc 4224; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4225; GFX11-NEXT: s_waitcnt vmcnt(9) 4226; GFX11-NEXT: buffer_store_b8 v38, off, s[0:3], 0 dlc 4227; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4228; GFX11-NEXT: s_waitcnt vmcnt(8) 4229; GFX11-NEXT: buffer_store_b8 v39, off, s[0:3], 0 dlc 4230; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4231; GFX11-NEXT: s_waitcnt vmcnt(7) 4232; GFX11-NEXT: buffer_store_b8 v48, off, s[0:3], 0 dlc 4233; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4234; GFX11-NEXT: s_waitcnt vmcnt(6) 4235; GFX11-NEXT: buffer_store_b8 v49, off, s[0:3], 0 dlc 4236; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4237; GFX11-NEXT: s_waitcnt vmcnt(5) 4238; GFX11-NEXT: buffer_store_b8 v50, off, s[0:3], 0 dlc 4239; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4240; GFX11-NEXT: s_waitcnt vmcnt(4) 4241; GFX11-NEXT: buffer_store_b8 v51, off, s[0:3], 0 dlc 4242; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4243; GFX11-NEXT: s_waitcnt vmcnt(3) 4244; GFX11-NEXT: buffer_store_b8 v52, off, s[0:3], 0 dlc 4245; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4246; GFX11-NEXT: s_waitcnt vmcnt(2) 4247; GFX11-NEXT: buffer_store_b8 v53, off, s[0:3], 0 dlc 4248; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4249; GFX11-NEXT: s_waitcnt vmcnt(1) 4250; GFX11-NEXT: buffer_store_b8 v54, off, s[0:3], 0 dlc 4251; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4252; GFX11-NEXT: s_waitcnt vmcnt(0) 4253; GFX11-NEXT: buffer_store_b8 v55, off, s[0:3], 0 dlc 4254; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4255; GFX11-NEXT: s_setpc_b64 s[30:31] 4256 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 4257 store volatile <16 x i8> %arg1, ptr addrspace(1) undef 4258 ret void 4259} 4260 4261 4262define void @void_func_bf16(bfloat %arg0) #0 { 4263; CI-LABEL: void_func_bf16: 4264; CI: ; %bb.0: 4265; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4266; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4267; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4268; CI-NEXT: s_mov_b32 s7, 0xf000 4269; CI-NEXT: s_mov_b32 s6, -1 4270; CI-NEXT: buffer_store_short v0, off, s[4:7], 0 4271; CI-NEXT: s_waitcnt vmcnt(0) 4272; CI-NEXT: s_setpc_b64 s[30:31] 4273; 4274; GFX89-LABEL: void_func_bf16: 4275; GFX89: ; %bb.0: 4276; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4277; GFX89-NEXT: s_mov_b32 s7, 0xf000 4278; GFX89-NEXT: s_mov_b32 s6, -1 4279; GFX89-NEXT: buffer_store_short v0, off, s[4:7], 0 4280; GFX89-NEXT: s_waitcnt vmcnt(0) 4281; GFX89-NEXT: s_setpc_b64 s[30:31] 4282; 4283; GFX11-LABEL: void_func_bf16: 4284; GFX11: ; %bb.0: 4285; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4286; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4287; GFX11-NEXT: s_mov_b32 s2, -1 4288; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 4289; GFX11-NEXT: s_setpc_b64 s[30:31] 4290 store bfloat %arg0, ptr addrspace(1) undef 4291 ret void 4292} 4293 4294define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 { 4295; CI-LABEL: void_func_v2bf16: 4296; CI: ; %bb.0: 4297; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4298; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 4299; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4300; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4301; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 4302; CI-NEXT: s_mov_b32 s7, 0xf000 4303; CI-NEXT: s_mov_b32 s6, -1 4304; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 4305; CI-NEXT: s_waitcnt vmcnt(0) 4306; CI-NEXT: s_setpc_b64 s[30:31] 4307; 4308; GFX89-LABEL: void_func_v2bf16: 4309; GFX89: ; %bb.0: 4310; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4311; GFX89-NEXT: s_mov_b32 s7, 0xf000 4312; GFX89-NEXT: s_mov_b32 s6, -1 4313; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 4314; GFX89-NEXT: s_waitcnt vmcnt(0) 4315; GFX89-NEXT: s_setpc_b64 s[30:31] 4316; 4317; GFX11-LABEL: void_func_v2bf16: 4318; GFX11: ; %bb.0: 4319; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4320; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4321; GFX11-NEXT: s_mov_b32 s2, -1 4322; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 4323; GFX11-NEXT: s_setpc_b64 s[30:31] 4324 store <2 x bfloat> %arg0, ptr addrspace(1) undef 4325 ret void 4326} 4327 4328define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 { 4329; CI-LABEL: void_func_v3bf16: 4330; CI: ; %bb.0: 4331; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4332; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 4333; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4334; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4335; CI-NEXT: v_alignbit_b32 v0, v1, v0, 16 4336; CI-NEXT: v_mul_f32_e32 v1, 1.0, v2 4337; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4338; CI-NEXT: s_mov_b32 s7, 0xf000 4339; CI-NEXT: s_mov_b32 s6, -1 4340; CI-NEXT: buffer_store_short v1, off, s[4:7], 0 4341; CI-NEXT: buffer_store_dword v0, off, s[4:7], 0 4342; CI-NEXT: s_waitcnt vmcnt(0) 4343; CI-NEXT: s_setpc_b64 s[30:31] 4344; 4345; GFX89-LABEL: void_func_v3bf16: 4346; GFX89: ; %bb.0: 4347; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4348; GFX89-NEXT: s_mov_b32 s7, 0xf000 4349; GFX89-NEXT: s_mov_b32 s6, -1 4350; GFX89-NEXT: buffer_store_short v1, off, s[4:7], 0 4351; GFX89-NEXT: buffer_store_dword v0, off, s[4:7], 0 4352; GFX89-NEXT: s_waitcnt vmcnt(0) 4353; GFX89-NEXT: s_setpc_b64 s[30:31] 4354; 4355; GFX11-LABEL: void_func_v3bf16: 4356; GFX11: ; %bb.0: 4357; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4358; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4359; GFX11-NEXT: s_mov_b32 s2, -1 4360; GFX11-NEXT: s_clause 0x1 4361; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 4362; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 4363; GFX11-NEXT: s_setpc_b64 s[30:31] 4364 store <3 x bfloat> %arg0, ptr addrspace(1) undef 4365 ret void 4366} 4367 4368define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 { 4369; CI-LABEL: void_func_v4bf16: 4370; CI: ; %bb.0: 4371; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4372; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 4373; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 4374; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 4375; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 4376; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4377; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4378; CI-NEXT: v_alignbit_b32 v2, v3, v2, 16 4379; CI-NEXT: v_alignbit_b32 v1, v1, v0, 16 4380; CI-NEXT: s_mov_b32 s7, 0xf000 4381; CI-NEXT: s_mov_b32 s6, -1 4382; CI-NEXT: buffer_store_dwordx2 v[1:2], off, s[4:7], 0 4383; CI-NEXT: s_waitcnt vmcnt(0) 4384; CI-NEXT: s_setpc_b64 s[30:31] 4385; 4386; GFX89-LABEL: void_func_v4bf16: 4387; GFX89: ; %bb.0: 4388; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4389; GFX89-NEXT: s_mov_b32 s7, 0xf000 4390; GFX89-NEXT: s_mov_b32 s6, -1 4391; GFX89-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4392; GFX89-NEXT: s_waitcnt vmcnt(0) 4393; GFX89-NEXT: s_setpc_b64 s[30:31] 4394; 4395; GFX11-LABEL: void_func_v4bf16: 4396; GFX11: ; %bb.0: 4397; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4398; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4399; GFX11-NEXT: s_mov_b32 s2, -1 4400; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 4401; GFX11-NEXT: s_setpc_b64 s[30:31] 4402 store <4 x bfloat> %arg0, ptr addrspace(1) undef 4403 ret void 4404} 4405 4406define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 { 4407; CI-LABEL: void_func_v8bf16: 4408; CI: ; %bb.0: 4409; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4410; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7 4411; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5 4412; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 4413; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 4414; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 4415; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6 4416; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 4417; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4 4418; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 4419; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 4420; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4421; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4422; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 4423; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 4424; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 4425; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 4426; CI-NEXT: s_mov_b32 s7, 0xf000 4427; CI-NEXT: s_mov_b32 s6, -1 4428; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 4429; CI-NEXT: s_waitcnt vmcnt(0) 4430; CI-NEXT: s_setpc_b64 s[30:31] 4431; 4432; GFX89-LABEL: void_func_v8bf16: 4433; GFX89: ; %bb.0: 4434; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4435; GFX89-NEXT: s_mov_b32 s7, 0xf000 4436; GFX89-NEXT: s_mov_b32 s6, -1 4437; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4438; GFX89-NEXT: s_waitcnt vmcnt(0) 4439; GFX89-NEXT: s_setpc_b64 s[30:31] 4440; 4441; GFX11-LABEL: void_func_v8bf16: 4442; GFX11: ; %bb.0: 4443; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4444; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4445; GFX11-NEXT: s_mov_b32 s2, -1 4446; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 4447; GFX11-NEXT: s_setpc_b64 s[30:31] 4448 store <8 x bfloat> %arg0, ptr addrspace(1) undef 4449 ret void 4450} 4451 4452define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 { 4453; CI-LABEL: void_func_v16bf16: 4454; CI: ; %bb.0: 4455; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4456; CI-NEXT: v_mul_f32_e32 v5, 1.0, v5 4457; CI-NEXT: v_mul_f32_e32 v3, 1.0, v3 4458; CI-NEXT: v_mul_f32_e32 v1, 1.0, v1 4459; CI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 4460; CI-NEXT: v_mul_f32_e32 v4, 1.0, v4 4461; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 4462; CI-NEXT: v_mul_f32_e32 v2, 1.0, v2 4463; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4464; CI-NEXT: v_mul_f32_e32 v0, 1.0, v0 4465; CI-NEXT: v_alignbit_b32 v5, v5, v4, 16 4466; CI-NEXT: v_alignbit_b32 v4, v3, v2, 16 4467; CI-NEXT: v_alignbit_b32 v3, v1, v0, 16 4468; CI-NEXT: v_mul_f32_e32 v0, 1.0, v15 4469; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4470; CI-NEXT: v_mul_f32_e32 v1, 1.0, v14 4471; CI-NEXT: v_alignbit_b32 v14, v0, v1, 16 4472; CI-NEXT: v_mul_f32_e32 v0, 1.0, v13 4473; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4474; CI-NEXT: v_mul_f32_e32 v1, 1.0, v12 4475; CI-NEXT: v_alignbit_b32 v13, v0, v1, 16 4476; CI-NEXT: v_mul_f32_e32 v0, 1.0, v11 4477; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4478; CI-NEXT: v_mul_f32_e32 v1, 1.0, v10 4479; CI-NEXT: v_alignbit_b32 v12, v0, v1, 16 4480; CI-NEXT: v_mul_f32_e32 v0, 1.0, v9 4481; CI-NEXT: v_mul_f32_e32 v7, 1.0, v7 4482; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 4483; CI-NEXT: v_mul_f32_e32 v1, 1.0, v8 4484; CI-NEXT: v_lshrrev_b32_e32 v7, 16, v7 4485; CI-NEXT: v_mul_f32_e32 v6, 1.0, v6 4486; CI-NEXT: v_alignbit_b32 v11, v0, v1, 16 4487; CI-NEXT: s_mov_b32 s7, 0xf000 4488; CI-NEXT: s_mov_b32 s6, -1 4489; CI-NEXT: v_alignbit_b32 v6, v7, v6, 16 4490; CI-NEXT: buffer_store_dwordx4 v[11:14], off, s[4:7], 0 4491; CI-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 4492; CI-NEXT: s_waitcnt vmcnt(0) 4493; CI-NEXT: s_setpc_b64 s[30:31] 4494; 4495; GFX89-LABEL: void_func_v16bf16: 4496; GFX89: ; %bb.0: 4497; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4498; GFX89-NEXT: s_mov_b32 s7, 0xf000 4499; GFX89-NEXT: s_mov_b32 s6, -1 4500; GFX89-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 4501; GFX89-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4502; GFX89-NEXT: s_waitcnt vmcnt(0) 4503; GFX89-NEXT: s_setpc_b64 s[30:31] 4504; 4505; GFX11-LABEL: void_func_v16bf16: 4506; GFX11: ; %bb.0: 4507; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4508; GFX11-NEXT: s_mov_b32 s3, 0x31016000 4509; GFX11-NEXT: s_mov_b32 s2, -1 4510; GFX11-NEXT: s_clause 0x1 4511; GFX11-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 4512; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 4513; GFX11-NEXT: s_setpc_b64 s[30:31] 4514 store <16 x bfloat> %arg0, ptr addrspace(1) undef 4515 ret void 4516} 4517 4518attributes #0 = { nounwind } 4519