1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s 4 5define void @void_func_i1_inreg(i1 inreg %arg0) #0 { 6; GFX9-LABEL: void_func_i1_inreg: 7; GFX9: ; %bb.0: 8; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; GFX9-NEXT: s_and_b32 s4, s16, 1 10; GFX9-NEXT: v_mov_b32_e32 v0, s4 11; GFX9-NEXT: global_store_byte v[0:1], v0, off 12; GFX9-NEXT: s_waitcnt vmcnt(0) 13; GFX9-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX11-LABEL: void_func_i1_inreg: 16; GFX11: ; %bb.0: 17; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX11-NEXT: s_and_b32 s0, s0, 1 19; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 20; GFX11-NEXT: v_mov_b32_e32 v0, s0 21; GFX11-NEXT: global_store_b8 v[0:1], v0, off 22; GFX11-NEXT: s_setpc_b64 s[30:31] 23 store i1 %arg0, ptr addrspace(1) undef 24 ret void 25} 26 27define void @void_func_i8_inreg(i8 inreg %arg0) #0 { 28; GFX9-LABEL: void_func_i8_inreg: 29; GFX9: ; %bb.0: 30; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX9-NEXT: v_mov_b32_e32 v0, s16 32; GFX9-NEXT: global_store_byte v[0:1], v0, off 33; GFX9-NEXT: s_waitcnt vmcnt(0) 34; GFX9-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX11-LABEL: void_func_i8_inreg: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX11-NEXT: v_mov_b32_e32 v0, s0 40; GFX11-NEXT: global_store_b8 v[0:1], v0, off 41; GFX11-NEXT: s_setpc_b64 s[30:31] 42 store i8 %arg0, ptr addrspace(1) undef 43 ret void 44} 45 46define void @void_func_i16_inreg(i16 inreg %arg0) #0 { 47; GFX9-LABEL: void_func_i16_inreg: 48; GFX9: ; %bb.0: 49; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 50; GFX9-NEXT: v_mov_b32_e32 v0, s16 51; GFX9-NEXT: global_store_short v[0:1], v0, off 52; GFX9-NEXT: s_waitcnt vmcnt(0) 53; GFX9-NEXT: s_setpc_b64 s[30:31] 54; 55; GFX11-LABEL: void_func_i16_inreg: 56; GFX11: ; %bb.0: 57; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX11-NEXT: v_mov_b32_e32 v0, s0 59; GFX11-NEXT: global_store_b16 v[0:1], v0, off 60; GFX11-NEXT: s_setpc_b64 s[30:31] 61 store i16 %arg0, ptr addrspace(1) undef 62 ret void 63} 64 65define void @void_func_i32_inreg(i32 inreg %arg0) #0 { 66; GFX9-LABEL: void_func_i32_inreg: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX9-NEXT: v_mov_b32_e32 v0, s16 70; GFX9-NEXT: global_store_dword v[0:1], v0, off 71; GFX9-NEXT: s_waitcnt vmcnt(0) 72; GFX9-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX11-LABEL: void_func_i32_inreg: 75; GFX11: ; %bb.0: 76; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX11-NEXT: v_mov_b32_e32 v0, s0 78; GFX11-NEXT: global_store_b32 v[0:1], v0, off 79; GFX11-NEXT: s_setpc_b64 s[30:31] 80 store i32 %arg0, ptr addrspace(1) undef 81 ret void 82} 83 84define void @void_func_i64_inreg(i64 inreg %arg0) #0 { 85; GFX9-LABEL: void_func_i64_inreg: 86; GFX9: ; %bb.0: 87; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX9-NEXT: v_mov_b32_e32 v0, s16 89; GFX9-NEXT: v_mov_b32_e32 v1, s17 90; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 91; GFX9-NEXT: s_waitcnt vmcnt(0) 92; GFX9-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX11-LABEL: void_func_i64_inreg: 95; GFX11: ; %bb.0: 96; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 98; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 99; GFX11-NEXT: s_setpc_b64 s[30:31] 100 store i64 %arg0, ptr addrspace(1) undef 101 ret void 102} 103 104define void @void_func_f16_inreg(half inreg %arg0) #0 { 105; GFX9-LABEL: void_func_f16_inreg: 106; GFX9: ; %bb.0: 107; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX9-NEXT: v_mov_b32_e32 v0, s16 109; GFX9-NEXT: global_store_short v[0:1], v0, off 110; GFX9-NEXT: s_waitcnt vmcnt(0) 111; GFX9-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX11-LABEL: void_func_f16_inreg: 114; GFX11: ; %bb.0: 115; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX11-NEXT: v_mov_b32_e32 v0, s0 117; GFX11-NEXT: global_store_b16 v[0:1], v0, off 118; GFX11-NEXT: s_setpc_b64 s[30:31] 119 store half %arg0, ptr addrspace(1) undef 120 ret void 121} 122 123define void @void_func_f32_inreg(float inreg %arg0) #0 { 124; GFX9-LABEL: void_func_f32_inreg: 125; GFX9: ; %bb.0: 126; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX9-NEXT: v_mov_b32_e32 v0, s16 128; GFX9-NEXT: global_store_dword v[0:1], v0, off 129; GFX9-NEXT: s_waitcnt vmcnt(0) 130; GFX9-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX11-LABEL: void_func_f32_inreg: 133; GFX11: ; %bb.0: 134; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX11-NEXT: v_mov_b32_e32 v0, s0 136; GFX11-NEXT: global_store_b32 v[0:1], v0, off 137; GFX11-NEXT: s_setpc_b64 s[30:31] 138 store float %arg0, ptr addrspace(1) undef 139 ret void 140} 141 142define void @void_func_f64_inreg(double inreg %arg0) #0 { 143; GFX9-LABEL: void_func_f64_inreg: 144; GFX9: ; %bb.0: 145; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX9-NEXT: v_mov_b32_e32 v0, s16 147; GFX9-NEXT: v_mov_b32_e32 v1, s17 148; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 149; GFX9-NEXT: s_waitcnt vmcnt(0) 150; GFX9-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX11-LABEL: void_func_f64_inreg: 153; GFX11: ; %bb.0: 154; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 156; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 157; GFX11-NEXT: s_setpc_b64 s[30:31] 158 store double %arg0, ptr addrspace(1) undef 159 ret void 160} 161 162define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 { 163; GFX9-LABEL: void_func_v2i16_inreg: 164; GFX9: ; %bb.0: 165; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX9-NEXT: v_mov_b32_e32 v0, s16 167; GFX9-NEXT: global_store_dword v[0:1], v0, off 168; GFX9-NEXT: s_waitcnt vmcnt(0) 169; GFX9-NEXT: s_setpc_b64 s[30:31] 170; 171; GFX11-LABEL: void_func_v2i16_inreg: 172; GFX11: ; %bb.0: 173; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GFX11-NEXT: v_mov_b32_e32 v0, s0 175; GFX11-NEXT: global_store_b32 v[0:1], v0, off 176; GFX11-NEXT: s_setpc_b64 s[30:31] 177 store <2 x i16> %arg0, ptr addrspace(1) undef 178 ret void 179} 180 181define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 { 182; GFX9-LABEL: void_func_v3i16_inreg: 183; GFX9: ; %bb.0: 184; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX9-NEXT: v_mov_b32_e32 v0, s17 186; GFX9-NEXT: global_store_short v[0:1], v0, off 187; GFX9-NEXT: v_mov_b32_e32 v0, s16 188; GFX9-NEXT: global_store_dword v[0:1], v0, off 189; GFX9-NEXT: s_waitcnt vmcnt(0) 190; GFX9-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX11-LABEL: void_func_v3i16_inreg: 193; GFX11: ; %bb.0: 194; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 196; GFX11-NEXT: s_clause 0x1 197; GFX11-NEXT: global_store_b16 v[0:1], v0, off 198; GFX11-NEXT: global_store_b32 v[0:1], v1, off 199; GFX11-NEXT: s_setpc_b64 s[30:31] 200 store <3 x i16> %arg0, ptr addrspace(1) undef 201 ret void 202} 203 204define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 { 205; GFX89-LABEL: void_func_v4i16_inreg: 206; GFX89: ; %bb.0: 207; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208; GFX89-NEXT: v_mov_b32_e32 v0, s4 209; GFX89-NEXT: v_mov_b32_e32 v1, s5 210; GFX89-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 211; GFX89-NEXT: s_waitcnt vmcnt(0) 212; GFX89-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX9-LABEL: void_func_v4i16_inreg: 215; GFX9: ; %bb.0: 216; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX9-NEXT: v_mov_b32_e32 v0, s16 218; GFX9-NEXT: v_mov_b32_e32 v1, s17 219; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 220; GFX9-NEXT: s_waitcnt vmcnt(0) 221; GFX9-NEXT: s_setpc_b64 s[30:31] 222; 223; GFX11-LABEL: void_func_v4i16_inreg: 224; GFX11: ; %bb.0: 225; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 226; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 227; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 228; GFX11-NEXT: s_setpc_b64 s[30:31] 229 store <4 x i16> %arg0, ptr addrspace(1) undef 230 ret void 231} 232 233define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 { 234; GFX9-LABEL: void_func_v5i16_inreg: 235; GFX9: ; %bb.0: 236; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX9-NEXT: v_mov_b32_e32 v0, s18 238; GFX9-NEXT: global_store_short v[0:1], v0, off 239; GFX9-NEXT: v_mov_b32_e32 v0, s16 240; GFX9-NEXT: v_mov_b32_e32 v1, s17 241; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 242; GFX9-NEXT: s_waitcnt vmcnt(0) 243; GFX9-NEXT: s_setpc_b64 s[30:31] 244; 245; GFX11-LABEL: void_func_v5i16_inreg: 246; GFX11: ; %bb.0: 247; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1 249; GFX11-NEXT: v_mov_b32_e32 v0, s0 250; GFX11-NEXT: s_clause 0x1 251; GFX11-NEXT: global_store_b16 v[0:1], v2, off 252; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 253; GFX11-NEXT: s_setpc_b64 s[30:31] 254 store <5 x i16> %arg0, ptr addrspace(1) undef 255 ret void 256} 257 258define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 { 259; GFX9-LABEL: void_func_v8i16_inreg: 260; GFX9: ; %bb.0: 261; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX9-NEXT: v_mov_b32_e32 v0, s16 263; GFX9-NEXT: v_mov_b32_e32 v1, s17 264; GFX9-NEXT: v_mov_b32_e32 v2, s18 265; GFX9-NEXT: v_mov_b32_e32 v3, s19 266; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 267; GFX9-NEXT: s_waitcnt vmcnt(0) 268; GFX9-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX11-LABEL: void_func_v8i16_inreg: 271; GFX11: ; %bb.0: 272; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 274; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 275; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 276; GFX11-NEXT: s_setpc_b64 s[30:31] 277 store <8 x i16> %arg0, ptr addrspace(1) undef 278 ret void 279} 280 281define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 { 282; GFX9-LABEL: void_func_v2i32_inreg: 283; GFX9: ; %bb.0: 284; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 285; GFX9-NEXT: v_mov_b32_e32 v0, s16 286; GFX9-NEXT: v_mov_b32_e32 v1, s17 287; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 288; GFX9-NEXT: s_waitcnt vmcnt(0) 289; GFX9-NEXT: s_setpc_b64 s[30:31] 290; 291; GFX11-LABEL: void_func_v2i32_inreg: 292; GFX11: ; %bb.0: 293; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 295; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 296; GFX11-NEXT: s_setpc_b64 s[30:31] 297 store <2 x i32> %arg0, ptr addrspace(1) undef 298 ret void 299} 300 301define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 { 302; GFX9-LABEL: void_func_v3i32_inreg: 303; GFX9: ; %bb.0: 304; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX9-NEXT: v_mov_b32_e32 v0, s16 306; GFX9-NEXT: v_mov_b32_e32 v1, s17 307; GFX9-NEXT: v_mov_b32_e32 v2, s18 308; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off 309; GFX9-NEXT: s_waitcnt vmcnt(0) 310; GFX9-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX11-LABEL: void_func_v3i32_inreg: 313; GFX11: ; %bb.0: 314; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 316; GFX11-NEXT: v_mov_b32_e32 v2, s2 317; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off 318; GFX11-NEXT: s_setpc_b64 s[30:31] 319 store <3 x i32> %arg0, ptr addrspace(1) undef 320 ret void 321} 322 323define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 { 324; GFX9-LABEL: void_func_v4i32_inreg: 325; GFX9: ; %bb.0: 326; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 327; GFX9-NEXT: v_mov_b32_e32 v0, s16 328; GFX9-NEXT: v_mov_b32_e32 v1, s17 329; GFX9-NEXT: v_mov_b32_e32 v2, s18 330; GFX9-NEXT: v_mov_b32_e32 v3, s19 331; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 332; GFX9-NEXT: s_waitcnt vmcnt(0) 333; GFX9-NEXT: s_setpc_b64 s[30:31] 334; 335; GFX11-LABEL: void_func_v4i32_inreg: 336; GFX11: ; %bb.0: 337; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 338; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 339; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 340; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 341; GFX11-NEXT: s_setpc_b64 s[30:31] 342 store <4 x i32> %arg0, ptr addrspace(1) undef 343 ret void 344} 345 346define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 { 347; GFX9-LABEL: void_func_v5i32_inreg: 348; GFX9: ; %bb.0: 349; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 350; GFX9-NEXT: v_mov_b32_e32 v0, s20 351; GFX9-NEXT: global_store_dword v[0:1], v0, off 352; GFX9-NEXT: v_mov_b32_e32 v0, s16 353; GFX9-NEXT: v_mov_b32_e32 v1, s17 354; GFX9-NEXT: v_mov_b32_e32 v2, s18 355; GFX9-NEXT: v_mov_b32_e32 v3, s19 356; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 357; GFX9-NEXT: s_waitcnt vmcnt(0) 358; GFX9-NEXT: s_setpc_b64 s[30:31] 359; 360; GFX11-LABEL: void_func_v5i32_inreg: 361; GFX11: ; %bb.0: 362; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s1 364; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 365; GFX11-NEXT: v_mov_b32_e32 v2, s2 366; GFX11-NEXT: s_clause 0x1 367; GFX11-NEXT: global_store_b32 v[0:1], v4, off 368; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 369; GFX11-NEXT: s_setpc_b64 s[30:31] 370 store <5 x i32> %arg0, ptr addrspace(1) undef 371 ret void 372} 373 374define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 { 375; GFX9-LABEL: void_func_v8i32_inreg: 376; GFX9: ; %bb.0: 377; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 378; GFX9-NEXT: v_mov_b32_e32 v0, s20 379; GFX9-NEXT: v_mov_b32_e32 v1, s21 380; GFX9-NEXT: v_mov_b32_e32 v2, s22 381; GFX9-NEXT: v_mov_b32_e32 v3, s23 382; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 383; GFX9-NEXT: s_nop 0 384; GFX9-NEXT: v_mov_b32_e32 v0, s16 385; GFX9-NEXT: v_mov_b32_e32 v1, s17 386; GFX9-NEXT: v_mov_b32_e32 v2, s18 387; GFX9-NEXT: v_mov_b32_e32 v3, s19 388; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 389; GFX9-NEXT: s_waitcnt vmcnt(0) 390; GFX9-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX11-LABEL: void_func_v8i32_inreg: 393; GFX11: ; %bb.0: 394; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 396; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 397; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 398; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 399; GFX11-NEXT: s_clause 0x1 400; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 401; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 402; GFX11-NEXT: s_setpc_b64 s[30:31] 403 store <8 x i32> %arg0, ptr addrspace(1) undef 404 ret void 405} 406 407define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 { 408; CIGFX89-LABEL: void_func_v16i32_inreg: 409; CIGFX89: ; %bb.0: 410; CIGFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; CIGFX89-NEXT: v_mov_b32_e32 v0, s16 412; CIGFX89-NEXT: v_mov_b32_e32 v1, s17 413; CIGFX89-NEXT: v_mov_b32_e32 v2, s18 414; CIGFX89-NEXT: v_mov_b32_e32 v3, s19 415; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 416; CIGFX89-NEXT: s_nop 0 417; CIGFX89-NEXT: v_mov_b32_e32 v0, s12 418; CIGFX89-NEXT: v_mov_b32_e32 v1, s13 419; CIGFX89-NEXT: v_mov_b32_e32 v2, s14 420; CIGFX89-NEXT: v_mov_b32_e32 v3, s15 421; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 422; CIGFX89-NEXT: s_nop 0 423; CIGFX89-NEXT: v_mov_b32_e32 v0, s8 424; CIGFX89-NEXT: v_mov_b32_e32 v1, s9 425; CIGFX89-NEXT: v_mov_b32_e32 v2, s10 426; CIGFX89-NEXT: v_mov_b32_e32 v3, s11 427; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 428; CIGFX89-NEXT: s_nop 0 429; CIGFX89-NEXT: v_mov_b32_e32 v0, s4 430; CIGFX89-NEXT: v_mov_b32_e32 v1, s5 431; CIGFX89-NEXT: v_mov_b32_e32 v2, s6 432; CIGFX89-NEXT: v_mov_b32_e32 v3, s7 433; CIGFX89-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 434; CIGFX89-NEXT: s_waitcnt vmcnt(0) 435; CIGFX89-NEXT: s_setpc_b64 s[30:31] 436; 437; GFX9-LABEL: void_func_v16i32_inreg: 438; GFX9: ; %bb.0: 439; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX9-NEXT: v_mov_b32_e32 v3, v1 441; GFX9-NEXT: v_mov_b32_e32 v2, v0 442; GFX9-NEXT: v_mov_b32_e32 v0, s28 443; GFX9-NEXT: v_mov_b32_e32 v1, s29 444; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 445; GFX9-NEXT: s_nop 0 446; GFX9-NEXT: v_mov_b32_e32 v0, s24 447; GFX9-NEXT: v_mov_b32_e32 v1, s25 448; GFX9-NEXT: v_mov_b32_e32 v2, s26 449; GFX9-NEXT: v_mov_b32_e32 v3, s27 450; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 451; GFX9-NEXT: s_nop 0 452; GFX9-NEXT: v_mov_b32_e32 v0, s20 453; GFX9-NEXT: v_mov_b32_e32 v1, s21 454; GFX9-NEXT: v_mov_b32_e32 v2, s22 455; GFX9-NEXT: v_mov_b32_e32 v3, s23 456; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 457; GFX9-NEXT: s_nop 0 458; GFX9-NEXT: v_mov_b32_e32 v0, s16 459; GFX9-NEXT: v_mov_b32_e32 v1, s17 460; GFX9-NEXT: v_mov_b32_e32 v2, s18 461; GFX9-NEXT: v_mov_b32_e32 v3, s19 462; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 463; GFX9-NEXT: s_waitcnt vmcnt(0) 464; GFX9-NEXT: s_setpc_b64 s[30:31] 465; 466; GFX11-LABEL: void_func_v16i32_inreg: 467; GFX11: ; %bb.0: 468; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 470; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 471; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 472; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 473; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 474; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 475; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 476; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 477; GFX11-NEXT: s_clause 0x3 478; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 479; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 480; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 481; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 482; GFX11-NEXT: s_setpc_b64 s[30:31] 483 store <16 x i32> %arg0, ptr addrspace(1) undef 484 ret void 485} 486 487define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 { 488; GFX9-LABEL: void_func_v32i32_inreg: 489; GFX9: ; %bb.0: 490; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX9-NEXT: v_mov_b32_e32 v19, v1 492; GFX9-NEXT: v_mov_b32_e32 v18, v0 493; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 494; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 495; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 496; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 497; GFX9-NEXT: v_mov_b32_e32 v16, s28 498; GFX9-NEXT: v_mov_b32_e32 v17, s29 499; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off 500; GFX9-NEXT: v_mov_b32_e32 v0, s24 501; GFX9-NEXT: v_mov_b32_e32 v1, s25 502; GFX9-NEXT: v_mov_b32_e32 v2, s26 503; GFX9-NEXT: v_mov_b32_e32 v3, s27 504; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 505; GFX9-NEXT: s_nop 0 506; GFX9-NEXT: v_mov_b32_e32 v0, s20 507; GFX9-NEXT: v_mov_b32_e32 v1, s21 508; GFX9-NEXT: v_mov_b32_e32 v2, s22 509; GFX9-NEXT: v_mov_b32_e32 v3, s23 510; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 511; GFX9-NEXT: s_nop 0 512; GFX9-NEXT: v_mov_b32_e32 v0, s16 513; GFX9-NEXT: v_mov_b32_e32 v1, s17 514; GFX9-NEXT: v_mov_b32_e32 v2, s18 515; GFX9-NEXT: v_mov_b32_e32 v3, s19 516; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 517; GFX9-NEXT: s_waitcnt vmcnt(0) 518; GFX9-NEXT: s_setpc_b64 s[30:31] 519; 520; GFX11-LABEL: void_func_v32i32_inreg: 521; GFX11: ; %bb.0: 522; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 523; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 524; GFX11-NEXT: s_clause 0x2 525; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off 526; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off 527; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 528; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 529; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 530; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 531; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 532; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 533; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 534; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 535; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 536; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 537; GFX11-NEXT: s_clause 0x4 538; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 539; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 540; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 541; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 542; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off 543; GFX11-NEXT: s_setpc_b64 s[30:31] 544 store <32 x i32> %arg0, ptr addrspace(1) undef 545 ret void 546} 547 548define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 { 549; GFX9-LABEL: void_func_v2i64_inreg: 550; GFX9: ; %bb.0: 551; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; GFX9-NEXT: v_mov_b32_e32 v0, s16 553; GFX9-NEXT: v_mov_b32_e32 v1, s17 554; GFX9-NEXT: v_mov_b32_e32 v2, s18 555; GFX9-NEXT: v_mov_b32_e32 v3, s19 556; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 557; GFX9-NEXT: s_waitcnt vmcnt(0) 558; GFX9-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX11-LABEL: void_func_v2i64_inreg: 561; GFX11: ; %bb.0: 562; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 564; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 565; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 566; GFX11-NEXT: s_setpc_b64 s[30:31] 567 store <2 x i64> %arg0, ptr addrspace(1) undef 568 ret void 569} 570 571define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 { 572; GFX9-LABEL: void_func_v3i64_inreg: 573; GFX9: ; %bb.0: 574; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX9-NEXT: v_mov_b32_e32 v0, s20 576; GFX9-NEXT: v_mov_b32_e32 v1, s21 577; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 578; GFX9-NEXT: v_mov_b32_e32 v0, s16 579; GFX9-NEXT: v_mov_b32_e32 v1, s17 580; GFX9-NEXT: v_mov_b32_e32 v2, s18 581; GFX9-NEXT: v_mov_b32_e32 v3, s19 582; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 583; GFX9-NEXT: s_waitcnt vmcnt(0) 584; GFX9-NEXT: s_setpc_b64 s[30:31] 585; 586; GFX11-LABEL: void_func_v3i64_inreg: 587; GFX11: ; %bb.0: 588; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 590; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 591; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 592; GFX11-NEXT: s_clause 0x1 593; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off 594; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 595; GFX11-NEXT: s_setpc_b64 s[30:31] 596 store <3 x i64> %arg0, ptr addrspace(1) undef 597 ret void 598} 599 600define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 { 601; GFX9-LABEL: void_func_v4i64_inreg: 602; GFX9: ; %bb.0: 603; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; GFX9-NEXT: v_mov_b32_e32 v0, s20 605; GFX9-NEXT: v_mov_b32_e32 v1, s21 606; GFX9-NEXT: v_mov_b32_e32 v2, s22 607; GFX9-NEXT: v_mov_b32_e32 v3, s23 608; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 609; GFX9-NEXT: s_nop 0 610; GFX9-NEXT: v_mov_b32_e32 v0, s16 611; GFX9-NEXT: v_mov_b32_e32 v1, s17 612; GFX9-NEXT: v_mov_b32_e32 v2, s18 613; GFX9-NEXT: v_mov_b32_e32 v3, s19 614; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 615; GFX9-NEXT: s_waitcnt vmcnt(0) 616; GFX9-NEXT: s_setpc_b64 s[30:31] 617; 618; GFX11-LABEL: void_func_v4i64_inreg: 619; GFX11: ; %bb.0: 620; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 622; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 623; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 624; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 625; GFX11-NEXT: s_clause 0x1 626; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 627; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 628; GFX11-NEXT: s_setpc_b64 s[30:31] 629 store <4 x i64> %arg0, ptr addrspace(1) undef 630 ret void 631} 632 633define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 { 634; GFX9-LABEL: void_func_v5i64_inreg: 635; GFX9: ; %bb.0: 636; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GFX9-NEXT: v_mov_b32_e32 v0, s20 638; GFX9-NEXT: v_mov_b32_e32 v1, s21 639; GFX9-NEXT: v_mov_b32_e32 v2, s22 640; GFX9-NEXT: v_mov_b32_e32 v3, s23 641; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 642; GFX9-NEXT: s_nop 0 643; GFX9-NEXT: v_mov_b32_e32 v0, s16 644; GFX9-NEXT: v_mov_b32_e32 v1, s17 645; GFX9-NEXT: v_mov_b32_e32 v2, s18 646; GFX9-NEXT: v_mov_b32_e32 v3, s19 647; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 648; GFX9-NEXT: s_nop 0 649; GFX9-NEXT: v_mov_b32_e32 v0, s24 650; GFX9-NEXT: v_mov_b32_e32 v1, s25 651; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 652; GFX9-NEXT: s_waitcnt vmcnt(0) 653; GFX9-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX11-LABEL: void_func_v5i64_inreg: 656; GFX11: ; %bb.0: 657; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 659; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 660; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 661; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 662; GFX11-NEXT: v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21 663; GFX11-NEXT: s_clause 0x2 664; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 665; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 666; GFX11-NEXT: global_store_b64 v[0:1], v[8:9], off 667; GFX11-NEXT: s_setpc_b64 s[30:31] 668 store <5 x i64> %arg0, ptr addrspace(1) undef 669 ret void 670} 671 672define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 { 673; GFX9-LABEL: void_func_v8i64_inreg: 674; GFX9: ; %bb.0: 675; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; GFX9-NEXT: v_mov_b32_e32 v3, v1 677; GFX9-NEXT: v_mov_b32_e32 v2, v0 678; GFX9-NEXT: v_mov_b32_e32 v0, s28 679; GFX9-NEXT: v_mov_b32_e32 v1, s29 680; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 681; GFX9-NEXT: s_nop 0 682; GFX9-NEXT: v_mov_b32_e32 v0, s24 683; GFX9-NEXT: v_mov_b32_e32 v1, s25 684; GFX9-NEXT: v_mov_b32_e32 v2, s26 685; GFX9-NEXT: v_mov_b32_e32 v3, s27 686; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 687; GFX9-NEXT: s_nop 0 688; GFX9-NEXT: v_mov_b32_e32 v0, s20 689; GFX9-NEXT: v_mov_b32_e32 v1, s21 690; GFX9-NEXT: v_mov_b32_e32 v2, s22 691; GFX9-NEXT: v_mov_b32_e32 v3, s23 692; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 693; GFX9-NEXT: s_nop 0 694; GFX9-NEXT: v_mov_b32_e32 v0, s16 695; GFX9-NEXT: v_mov_b32_e32 v1, s17 696; GFX9-NEXT: v_mov_b32_e32 v2, s18 697; GFX9-NEXT: v_mov_b32_e32 v3, s19 698; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 699; GFX9-NEXT: s_waitcnt vmcnt(0) 700; GFX9-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX11-LABEL: void_func_v8i64_inreg: 703; GFX11: ; %bb.0: 704; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 706; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 707; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 708; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 709; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 710; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 711; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 712; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 713; GFX11-NEXT: s_clause 0x3 714; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 715; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 716; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 717; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 718; GFX11-NEXT: s_setpc_b64 s[30:31] 719 store <8 x i64> %arg0, ptr addrspace(1) undef 720 ret void 721} 722 723define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 { 724; GFX9-LABEL: void_func_v16i64_inreg: 725; GFX9: ; %bb.0: 726; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 727; GFX9-NEXT: v_mov_b32_e32 v19, v1 728; GFX9-NEXT: v_mov_b32_e32 v18, v0 729; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 730; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 731; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 732; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 733; GFX9-NEXT: v_mov_b32_e32 v16, s28 734; GFX9-NEXT: v_mov_b32_e32 v17, s29 735; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off 736; GFX9-NEXT: v_mov_b32_e32 v0, s24 737; GFX9-NEXT: v_mov_b32_e32 v1, s25 738; GFX9-NEXT: v_mov_b32_e32 v2, s26 739; GFX9-NEXT: v_mov_b32_e32 v3, s27 740; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 741; GFX9-NEXT: s_nop 0 742; GFX9-NEXT: v_mov_b32_e32 v0, s20 743; GFX9-NEXT: v_mov_b32_e32 v1, s21 744; GFX9-NEXT: v_mov_b32_e32 v2, s22 745; GFX9-NEXT: v_mov_b32_e32 v3, s23 746; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 747; GFX9-NEXT: s_nop 0 748; GFX9-NEXT: v_mov_b32_e32 v0, s16 749; GFX9-NEXT: v_mov_b32_e32 v1, s17 750; GFX9-NEXT: v_mov_b32_e32 v2, s18 751; GFX9-NEXT: v_mov_b32_e32 v3, s19 752; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 753; GFX9-NEXT: s_waitcnt vmcnt(0) 754; GFX9-NEXT: s_setpc_b64 s[30:31] 755; 756; GFX11-LABEL: void_func_v16i64_inreg: 757; GFX11: ; %bb.0: 758; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 760; GFX11-NEXT: s_clause 0x2 761; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off 762; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off 763; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 764; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 765; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 766; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 767; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 768; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 769; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 770; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 771; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 772; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 773; GFX11-NEXT: s_clause 0x4 774; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 775; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 776; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 777; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 778; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off 779; GFX11-NEXT: s_setpc_b64 s[30:31] 780 store <16 x i64> %arg0, ptr addrspace(1) undef 781 ret void 782} 783 784define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 { 785; GFX9-LABEL: void_func_v2f16_inreg: 786; GFX9: ; %bb.0: 787; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 788; GFX9-NEXT: v_mov_b32_e32 v0, s16 789; GFX9-NEXT: global_store_dword v[0:1], v0, off 790; GFX9-NEXT: s_waitcnt vmcnt(0) 791; GFX9-NEXT: s_setpc_b64 s[30:31] 792; 793; GFX11-LABEL: void_func_v2f16_inreg: 794; GFX11: ; %bb.0: 795; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX11-NEXT: v_mov_b32_e32 v0, s0 797; GFX11-NEXT: global_store_b32 v[0:1], v0, off 798; GFX11-NEXT: s_setpc_b64 s[30:31] 799 store <2 x half> %arg0, ptr addrspace(1) undef 800 ret void 801} 802 803define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 { 804; GFX9-LABEL: void_func_v3f16_inreg: 805; GFX9: ; %bb.0: 806; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 807; GFX9-NEXT: v_mov_b32_e32 v0, s17 808; GFX9-NEXT: global_store_short v[0:1], v0, off 809; GFX9-NEXT: v_mov_b32_e32 v0, s16 810; GFX9-NEXT: global_store_dword v[0:1], v0, off 811; GFX9-NEXT: s_waitcnt vmcnt(0) 812; GFX9-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX11-LABEL: void_func_v3f16_inreg: 815; GFX11: ; %bb.0: 816; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 818; GFX11-NEXT: s_clause 0x1 819; GFX11-NEXT: global_store_b16 v[0:1], v0, off 820; GFX11-NEXT: global_store_b32 v[0:1], v1, off 821; GFX11-NEXT: s_setpc_b64 s[30:31] 822 store <3 x half> %arg0, ptr addrspace(1) undef 823 ret void 824} 825 826define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 { 827; GFX9-LABEL: void_func_v4f16_inreg: 828; GFX9: ; %bb.0: 829; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GFX9-NEXT: v_mov_b32_e32 v0, s16 831; GFX9-NEXT: v_mov_b32_e32 v1, s17 832; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 833; GFX9-NEXT: s_waitcnt vmcnt(0) 834; GFX9-NEXT: s_setpc_b64 s[30:31] 835; 836; GFX11-LABEL: void_func_v4f16_inreg: 837; GFX11: ; %bb.0: 838; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 840; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 841; GFX11-NEXT: s_setpc_b64 s[30:31] 842 store <4 x half> %arg0, ptr addrspace(1) undef 843 ret void 844} 845 846define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 { 847; GFX9-LABEL: void_func_v8f16_inreg: 848; GFX9: ; %bb.0: 849; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX9-NEXT: v_mov_b32_e32 v0, s16 851; GFX9-NEXT: v_mov_b32_e32 v1, s17 852; GFX9-NEXT: v_mov_b32_e32 v2, s18 853; GFX9-NEXT: v_mov_b32_e32 v3, s19 854; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 855; GFX9-NEXT: s_waitcnt vmcnt(0) 856; GFX9-NEXT: s_setpc_b64 s[30:31] 857; 858; GFX11-LABEL: void_func_v8f16_inreg: 859; GFX11: ; %bb.0: 860; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 861; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 862; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 863; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 864; GFX11-NEXT: s_setpc_b64 s[30:31] 865 store <8 x half> %arg0, ptr addrspace(1) undef 866 ret void 867} 868 869define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 { 870; GFX9-LABEL: void_func_v16f16_inreg: 871; GFX9: ; %bb.0: 872; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 873; GFX9-NEXT: v_mov_b32_e32 v0, s20 874; GFX9-NEXT: v_mov_b32_e32 v1, s21 875; GFX9-NEXT: v_mov_b32_e32 v2, s22 876; GFX9-NEXT: v_mov_b32_e32 v3, s23 877; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 878; GFX9-NEXT: s_nop 0 879; GFX9-NEXT: v_mov_b32_e32 v0, s16 880; GFX9-NEXT: v_mov_b32_e32 v1, s17 881; GFX9-NEXT: v_mov_b32_e32 v2, s18 882; GFX9-NEXT: v_mov_b32_e32 v3, s19 883; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 884; GFX9-NEXT: s_waitcnt vmcnt(0) 885; GFX9-NEXT: s_setpc_b64 s[30:31] 886; 887; GFX11-LABEL: void_func_v16f16_inreg: 888; GFX11: ; %bb.0: 889; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 890; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 891; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 892; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 893; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 894; GFX11-NEXT: s_clause 0x1 895; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 896; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 897; GFX11-NEXT: s_setpc_b64 s[30:31] 898 store <16 x half> %arg0, ptr addrspace(1) undef 899 ret void 900} 901 902define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 { 903; GFX9-LABEL: void_func_v2f32_inreg: 904; GFX9: ; %bb.0: 905; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 906; GFX9-NEXT: v_mov_b32_e32 v0, s16 907; GFX9-NEXT: v_mov_b32_e32 v1, s17 908; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 909; GFX9-NEXT: s_waitcnt vmcnt(0) 910; GFX9-NEXT: s_setpc_b64 s[30:31] 911; 912; GFX11-LABEL: void_func_v2f32_inreg: 913; GFX11: ; %bb.0: 914; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 915; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 916; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 917; GFX11-NEXT: s_setpc_b64 s[30:31] 918 store <2 x float> %arg0, ptr addrspace(1) undef 919 ret void 920} 921 922define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 { 923; GFX9-LABEL: void_func_v3f32_inreg: 924; GFX9: ; %bb.0: 925; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 926; GFX9-NEXT: v_mov_b32_e32 v0, s16 927; GFX9-NEXT: v_mov_b32_e32 v1, s17 928; GFX9-NEXT: v_mov_b32_e32 v2, s18 929; GFX9-NEXT: global_store_dwordx3 v[0:1], v[0:2], off 930; GFX9-NEXT: s_waitcnt vmcnt(0) 931; GFX9-NEXT: s_setpc_b64 s[30:31] 932; 933; GFX11-LABEL: void_func_v3f32_inreg: 934; GFX11: ; %bb.0: 935; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 937; GFX11-NEXT: v_mov_b32_e32 v2, s2 938; GFX11-NEXT: global_store_b96 v[0:1], v[0:2], off 939; GFX11-NEXT: s_setpc_b64 s[30:31] 940 store <3 x float> %arg0, ptr addrspace(1) undef 941 ret void 942} 943 944define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 { 945; GFX9-LABEL: void_func_v4f32_inreg: 946; GFX9: ; %bb.0: 947; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 948; GFX9-NEXT: v_mov_b32_e32 v0, s16 949; GFX9-NEXT: v_mov_b32_e32 v1, s17 950; GFX9-NEXT: v_mov_b32_e32 v2, s18 951; GFX9-NEXT: v_mov_b32_e32 v3, s19 952; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 953; GFX9-NEXT: s_waitcnt vmcnt(0) 954; GFX9-NEXT: s_setpc_b64 s[30:31] 955; 956; GFX11-LABEL: void_func_v4f32_inreg: 957; GFX11: ; %bb.0: 958; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 959; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 960; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 961; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 962; GFX11-NEXT: s_setpc_b64 s[30:31] 963 store <4 x float> %arg0, ptr addrspace(1) undef 964 ret void 965} 966 967define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 { 968; GFX9-LABEL: void_func_v8f32_inreg: 969; GFX9: ; %bb.0: 970; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 971; GFX9-NEXT: v_mov_b32_e32 v0, s20 972; GFX9-NEXT: v_mov_b32_e32 v1, s21 973; GFX9-NEXT: v_mov_b32_e32 v2, s22 974; GFX9-NEXT: v_mov_b32_e32 v3, s23 975; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 976; GFX9-NEXT: s_nop 0 977; GFX9-NEXT: v_mov_b32_e32 v0, s16 978; GFX9-NEXT: v_mov_b32_e32 v1, s17 979; GFX9-NEXT: v_mov_b32_e32 v2, s18 980; GFX9-NEXT: v_mov_b32_e32 v3, s19 981; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 982; GFX9-NEXT: s_waitcnt vmcnt(0) 983; GFX9-NEXT: s_setpc_b64 s[30:31] 984; 985; GFX11-LABEL: void_func_v8f32_inreg: 986; GFX11: ; %bb.0: 987; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 989; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 990; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 991; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 992; GFX11-NEXT: s_clause 0x1 993; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 994; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 995; GFX11-NEXT: s_setpc_b64 s[30:31] 996 store <8 x float> %arg0, ptr addrspace(1) undef 997 ret void 998} 999 1000define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 { 1001; GFX9-LABEL: void_func_v16f32_inreg: 1002; GFX9: ; %bb.0: 1003; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1004; GFX9-NEXT: v_mov_b32_e32 v3, v1 1005; GFX9-NEXT: v_mov_b32_e32 v2, v0 1006; GFX9-NEXT: v_mov_b32_e32 v0, s28 1007; GFX9-NEXT: v_mov_b32_e32 v1, s29 1008; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1009; GFX9-NEXT: s_nop 0 1010; GFX9-NEXT: v_mov_b32_e32 v0, s24 1011; GFX9-NEXT: v_mov_b32_e32 v1, s25 1012; GFX9-NEXT: v_mov_b32_e32 v2, s26 1013; GFX9-NEXT: v_mov_b32_e32 v3, s27 1014; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1015; GFX9-NEXT: s_nop 0 1016; GFX9-NEXT: v_mov_b32_e32 v0, s20 1017; GFX9-NEXT: v_mov_b32_e32 v1, s21 1018; GFX9-NEXT: v_mov_b32_e32 v2, s22 1019; GFX9-NEXT: v_mov_b32_e32 v3, s23 1020; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1021; GFX9-NEXT: s_nop 0 1022; GFX9-NEXT: v_mov_b32_e32 v0, s16 1023; GFX9-NEXT: v_mov_b32_e32 v1, s17 1024; GFX9-NEXT: v_mov_b32_e32 v2, s18 1025; GFX9-NEXT: v_mov_b32_e32 v3, s19 1026; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1027; GFX9-NEXT: s_waitcnt vmcnt(0) 1028; GFX9-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX11-LABEL: void_func_v16f32_inreg: 1031; GFX11: ; %bb.0: 1032; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 1034; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 1035; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 1036; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 1037; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 1038; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 1039; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 1040; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 1041; GFX11-NEXT: s_clause 0x3 1042; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1043; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 1044; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 1045; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 1046; GFX11-NEXT: s_setpc_b64 s[30:31] 1047 store <16 x float> %arg0, ptr addrspace(1) undef 1048 ret void 1049} 1050 1051define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 { 1052; GFX9-LABEL: void_func_v2f64_inreg: 1053; GFX9: ; %bb.0: 1054; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1055; GFX9-NEXT: v_mov_b32_e32 v0, s16 1056; GFX9-NEXT: v_mov_b32_e32 v1, s17 1057; GFX9-NEXT: v_mov_b32_e32 v2, s18 1058; GFX9-NEXT: v_mov_b32_e32 v3, s19 1059; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1060; GFX9-NEXT: s_waitcnt vmcnt(0) 1061; GFX9-NEXT: s_setpc_b64 s[30:31] 1062; 1063; GFX11-LABEL: void_func_v2f64_inreg: 1064; GFX11: ; %bb.0: 1065; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1066; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1067; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 1068; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1069; GFX11-NEXT: s_setpc_b64 s[30:31] 1070 store <2 x double> %arg0, ptr addrspace(1) undef 1071 ret void 1072} 1073 1074define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 { 1075; GFX9-LABEL: void_func_v3f64_inreg: 1076; GFX9: ; %bb.0: 1077; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1078; GFX9-NEXT: v_mov_b32_e32 v0, s20 1079; GFX9-NEXT: v_mov_b32_e32 v1, s21 1080; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1081; GFX9-NEXT: v_mov_b32_e32 v0, s16 1082; GFX9-NEXT: v_mov_b32_e32 v1, s17 1083; GFX9-NEXT: v_mov_b32_e32 v2, s18 1084; GFX9-NEXT: v_mov_b32_e32 v3, s19 1085; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1086; GFX9-NEXT: s_waitcnt vmcnt(0) 1087; GFX9-NEXT: s_setpc_b64 s[30:31] 1088; 1089; GFX11-LABEL: void_func_v3f64_inreg: 1090; GFX11: ; %bb.0: 1091; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; GFX11-NEXT: v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17 1093; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1094; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 1095; GFX11-NEXT: s_clause 0x1 1096; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off 1097; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1098; GFX11-NEXT: s_setpc_b64 s[30:31] 1099 store <3 x double> %arg0, ptr addrspace(1) undef 1100 ret void 1101} 1102 1103define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 { 1104; GFX9-LABEL: void_func_v4f64_inreg: 1105; GFX9: ; %bb.0: 1106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1107; GFX9-NEXT: v_mov_b32_e32 v0, s20 1108; GFX9-NEXT: v_mov_b32_e32 v1, s21 1109; GFX9-NEXT: v_mov_b32_e32 v2, s22 1110; GFX9-NEXT: v_mov_b32_e32 v3, s23 1111; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1112; GFX9-NEXT: s_nop 0 1113; GFX9-NEXT: v_mov_b32_e32 v0, s16 1114; GFX9-NEXT: v_mov_b32_e32 v1, s17 1115; GFX9-NEXT: v_mov_b32_e32 v2, s18 1116; GFX9-NEXT: v_mov_b32_e32 v3, s19 1117; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1118; GFX9-NEXT: s_waitcnt vmcnt(0) 1119; GFX9-NEXT: s_setpc_b64 s[30:31] 1120; 1121; GFX11-LABEL: void_func_v4f64_inreg: 1122; GFX11: ; %bb.0: 1123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1124; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 1125; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 1126; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 1127; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 1128; GFX11-NEXT: s_clause 0x1 1129; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1130; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 1131; GFX11-NEXT: s_setpc_b64 s[30:31] 1132 store <4 x double> %arg0, ptr addrspace(1) undef 1133 ret void 1134} 1135 1136define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 { 1137; GFX9-LABEL: void_func_v8f64_inreg: 1138; GFX9: ; %bb.0: 1139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1140; GFX9-NEXT: v_mov_b32_e32 v3, v1 1141; GFX9-NEXT: v_mov_b32_e32 v2, v0 1142; GFX9-NEXT: v_mov_b32_e32 v0, s28 1143; GFX9-NEXT: v_mov_b32_e32 v1, s29 1144; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1145; GFX9-NEXT: s_nop 0 1146; GFX9-NEXT: v_mov_b32_e32 v0, s24 1147; GFX9-NEXT: v_mov_b32_e32 v1, s25 1148; GFX9-NEXT: v_mov_b32_e32 v2, s26 1149; GFX9-NEXT: v_mov_b32_e32 v3, s27 1150; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1151; GFX9-NEXT: s_nop 0 1152; GFX9-NEXT: v_mov_b32_e32 v0, s20 1153; GFX9-NEXT: v_mov_b32_e32 v1, s21 1154; GFX9-NEXT: v_mov_b32_e32 v2, s22 1155; GFX9-NEXT: v_mov_b32_e32 v3, s23 1156; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1157; GFX9-NEXT: s_nop 0 1158; GFX9-NEXT: v_mov_b32_e32 v0, s16 1159; GFX9-NEXT: v_mov_b32_e32 v1, s17 1160; GFX9-NEXT: v_mov_b32_e32 v2, s18 1161; GFX9-NEXT: v_mov_b32_e32 v3, s19 1162; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1163; GFX9-NEXT: s_waitcnt vmcnt(0) 1164; GFX9-NEXT: s_setpc_b64 s[30:31] 1165; 1166; GFX11-LABEL: void_func_v8f64_inreg: 1167; GFX11: ; %bb.0: 1168; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1169; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 1170; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 1171; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 1172; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 1173; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 1174; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 1175; GFX11-NEXT: v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1 1176; GFX11-NEXT: v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3 1177; GFX11-NEXT: s_clause 0x3 1178; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1179; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 1180; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 1181; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 1182; GFX11-NEXT: s_setpc_b64 s[30:31] 1183 store <8 x double> %arg0, ptr addrspace(1) undef 1184 ret void 1185} 1186 1187define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 { 1188; GFX9-LABEL: void_func_v16f64_inreg: 1189; GFX9: ; %bb.0: 1190; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1191; GFX9-NEXT: v_mov_b32_e32 v19, v1 1192; GFX9-NEXT: v_mov_b32_e32 v18, v0 1193; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1194; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1195; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1196; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1197; GFX9-NEXT: v_mov_b32_e32 v16, s28 1198; GFX9-NEXT: v_mov_b32_e32 v17, s29 1199; GFX9-NEXT: global_store_dwordx4 v[0:1], v[16:19], off 1200; GFX9-NEXT: v_mov_b32_e32 v0, s24 1201; GFX9-NEXT: v_mov_b32_e32 v1, s25 1202; GFX9-NEXT: v_mov_b32_e32 v2, s26 1203; GFX9-NEXT: v_mov_b32_e32 v3, s27 1204; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1205; GFX9-NEXT: s_nop 0 1206; GFX9-NEXT: v_mov_b32_e32 v0, s20 1207; GFX9-NEXT: v_mov_b32_e32 v1, s21 1208; GFX9-NEXT: v_mov_b32_e32 v2, s22 1209; GFX9-NEXT: v_mov_b32_e32 v3, s23 1210; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1211; GFX9-NEXT: s_nop 0 1212; GFX9-NEXT: v_mov_b32_e32 v0, s16 1213; GFX9-NEXT: v_mov_b32_e32 v1, s17 1214; GFX9-NEXT: v_mov_b32_e32 v2, s18 1215; GFX9-NEXT: v_mov_b32_e32 v3, s19 1216; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1217; GFX9-NEXT: s_waitcnt vmcnt(0) 1218; GFX9-NEXT: s_setpc_b64 s[30:31] 1219; 1220; GFX11-LABEL: void_func_v16f64_inreg: 1221; GFX11: ; %bb.0: 1222; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1223; GFX11-NEXT: v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0 1224; GFX11-NEXT: s_clause 0x2 1225; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off 1226; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off 1227; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 1228; GFX11-NEXT: v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29 1229; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 1230; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 1231; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 1232; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 1233; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 1234; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 1235; GFX11-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 1236; GFX11-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 1237; GFX11-NEXT: s_clause 0x4 1238; GFX11-NEXT: global_store_b128 v[0:1], v[12:15], off 1239; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1240; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 1241; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off 1242; GFX11-NEXT: global_store_b128 v[0:1], v[16:19], off 1243; GFX11-NEXT: s_setpc_b64 s[30:31] 1244 store <16 x double> %arg0, ptr addrspace(1) undef 1245 ret void 1246} 1247 1248define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 { 1249; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: 1250; GFX9: ; %bb.0: 1251; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1252; GFX9-NEXT: v_mov_b32_e32 v25, v1 1253; GFX9-NEXT: v_mov_b32_e32 v24, v0 1254; GFX9-NEXT: v_mov_b32_e32 v22, s28 1255; GFX9-NEXT: v_mov_b32_e32 v23, s29 1256; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1257; GFX9-NEXT: s_waitcnt vmcnt(0) 1258; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1259; GFX9-NEXT: s_waitcnt vmcnt(0) 1260; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1261; GFX9-NEXT: s_waitcnt vmcnt(0) 1262; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1263; GFX9-NEXT: s_waitcnt vmcnt(0) 1264; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off 1265; GFX9-NEXT: s_waitcnt vmcnt(0) 1266; GFX9-NEXT: v_mov_b32_e32 v0, s24 1267; GFX9-NEXT: v_mov_b32_e32 v1, s25 1268; GFX9-NEXT: v_mov_b32_e32 v2, s26 1269; GFX9-NEXT: v_mov_b32_e32 v3, s27 1270; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1271; GFX9-NEXT: s_waitcnt vmcnt(0) 1272; GFX9-NEXT: v_mov_b32_e32 v0, s20 1273; GFX9-NEXT: v_mov_b32_e32 v1, s21 1274; GFX9-NEXT: v_mov_b32_e32 v2, s22 1275; GFX9-NEXT: v_mov_b32_e32 v3, s23 1276; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1277; GFX9-NEXT: s_waitcnt vmcnt(0) 1278; GFX9-NEXT: v_mov_b32_e32 v0, s16 1279; GFX9-NEXT: v_mov_b32_e32 v1, s17 1280; GFX9-NEXT: v_mov_b32_e32 v2, s18 1281; GFX9-NEXT: v_mov_b32_e32 v3, s19 1282; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1283; GFX9-NEXT: s_waitcnt vmcnt(0) 1284; GFX9-NEXT: v_and_b32_e32 v0, 1, v18 1285; GFX9-NEXT: global_store_byte v[0:1], v0, off 1286; GFX9-NEXT: s_waitcnt vmcnt(0) 1287; GFX9-NEXT: global_store_byte v[0:1], v19, off 1288; GFX9-NEXT: s_waitcnt vmcnt(0) 1289; GFX9-NEXT: global_store_short v[0:1], v20, off 1290; GFX9-NEXT: s_waitcnt vmcnt(0) 1291; GFX9-NEXT: global_store_short v[0:1], v21, off 1292; GFX9-NEXT: s_waitcnt vmcnt(0) 1293; GFX9-NEXT: s_setpc_b64 s[30:31] 1294; 1295; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg: 1296; GFX11: ; %bb.0: 1297; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1298; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0 1299; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29 1300; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc 1301; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1302; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc 1303; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1304; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc 1305; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1306; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc 1307; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1308; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 1309; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 1310; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 1311; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 1312; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 1313; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 1314; GFX11-NEXT: v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1 1315; GFX11-NEXT: v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3 1316; GFX11-NEXT: v_and_b32_e32 v12, 1, v14 1317; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1318; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1319; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1320; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1321; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1322; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1323; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc 1324; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1325; GFX11-NEXT: global_store_b8 v[0:1], v12, off dlc 1326; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1327; GFX11-NEXT: global_store_b8 v[0:1], v15, off dlc 1328; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1329; GFX11-NEXT: global_store_b16 v[0:1], v16, off dlc 1330; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1331; GFX11-NEXT: global_store_b16 v[0:1], v17, off dlc 1332; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1333; GFX11-NEXT: s_setpc_b64 s[30:31] 1334 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 1335 store volatile i1 %arg1, ptr addrspace(1) undef 1336 store volatile i8 %arg2, ptr addrspace(1) undef 1337 store volatile i16 %arg3, ptr addrspace(1) undef 1338 store volatile half %arg4, ptr addrspace(1) undef 1339 ret void 1340} 1341 1342define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 { 1343; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg: 1344; GFX9: ; %bb.0: 1345; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1346; GFX9-NEXT: v_mov_b32_e32 v25, v1 1347; GFX9-NEXT: v_mov_b32_e32 v24, v0 1348; GFX9-NEXT: v_mov_b32_e32 v22, s28 1349; GFX9-NEXT: v_mov_b32_e32 v23, s29 1350; GFX9-NEXT: global_store_dwordx4 v[0:1], v[14:17], off 1351; GFX9-NEXT: s_waitcnt vmcnt(0) 1352; GFX9-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 1353; GFX9-NEXT: s_waitcnt vmcnt(0) 1354; GFX9-NEXT: global_store_dwordx4 v[0:1], v[6:9], off 1355; GFX9-NEXT: s_waitcnt vmcnt(0) 1356; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 1357; GFX9-NEXT: s_waitcnt vmcnt(0) 1358; GFX9-NEXT: global_store_dwordx4 v[0:1], v[22:25], off 1359; GFX9-NEXT: s_waitcnt vmcnt(0) 1360; GFX9-NEXT: v_mov_b32_e32 v0, s24 1361; GFX9-NEXT: v_mov_b32_e32 v1, s25 1362; GFX9-NEXT: v_mov_b32_e32 v2, s26 1363; GFX9-NEXT: v_mov_b32_e32 v3, s27 1364; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1365; GFX9-NEXT: s_waitcnt vmcnt(0) 1366; GFX9-NEXT: v_mov_b32_e32 v0, s20 1367; GFX9-NEXT: v_mov_b32_e32 v1, s21 1368; GFX9-NEXT: v_mov_b32_e32 v2, s22 1369; GFX9-NEXT: v_mov_b32_e32 v3, s23 1370; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1371; GFX9-NEXT: s_waitcnt vmcnt(0) 1372; GFX9-NEXT: v_mov_b32_e32 v0, s16 1373; GFX9-NEXT: v_mov_b32_e32 v1, s17 1374; GFX9-NEXT: v_mov_b32_e32 v2, s18 1375; GFX9-NEXT: v_mov_b32_e32 v3, s19 1376; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1377; GFX9-NEXT: s_waitcnt vmcnt(0) 1378; GFX9-NEXT: global_store_dwordx2 v[0:1], v[18:19], off 1379; GFX9-NEXT: s_waitcnt vmcnt(0) 1380; GFX9-NEXT: global_store_dwordx2 v[0:1], v[20:21], off 1381; GFX9-NEXT: s_waitcnt vmcnt(0) 1382; GFX9-NEXT: s_setpc_b64 s[30:31] 1383; 1384; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg: 1385; GFX11: ; %bb.0: 1386; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1387; GFX11-NEXT: v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0 1388; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off dlc 1389; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1390; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off dlc 1391; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1392; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off dlc 1393; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1394; GFX11-NEXT: v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29 1395; GFX11-NEXT: v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25 1396; GFX11-NEXT: v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27 1397; GFX11-NEXT: v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21 1398; GFX11-NEXT: v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23 1399; GFX11-NEXT: v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17 1400; GFX11-NEXT: v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19 1401; GFX11-NEXT: v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1 1402; GFX11-NEXT: v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3 1403; GFX11-NEXT: global_store_b128 v[0:1], v[18:21], off dlc 1404; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1405; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off dlc 1406; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1407; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off dlc 1408; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1409; GFX11-NEXT: global_store_b128 v[0:1], v[8:11], off dlc 1410; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1411; GFX11-NEXT: global_store_b128 v[0:1], v[22:25], off dlc 1412; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1413; GFX11-NEXT: global_store_b64 v[0:1], v[14:15], off dlc 1414; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1415; GFX11-NEXT: global_store_b64 v[0:1], v[16:17], off dlc 1416; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1417; GFX11-NEXT: s_setpc_b64 s[30:31] 1418 store volatile <32 x i32> %arg0, ptr addrspace(1) undef 1419 store volatile <2 x i32> %arg1, ptr addrspace(1) undef 1420 store volatile <2 x float> %arg2, ptr addrspace(1) undef 1421 ret void 1422} 1423 1424define void @too_many_args_use_workitem_id_x_inreg( 1425; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg: 1426; GFX9: ; %bb.0: 1427; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1428; GFX9-NEXT: v_mov_b32_e32 v18, s16 1429; GFX9-NEXT: global_store_dword v[0:1], v18, off 1430; GFX9-NEXT: s_waitcnt vmcnt(0) 1431; GFX9-NEXT: v_mov_b32_e32 v18, s17 1432; GFX9-NEXT: global_store_dword v[0:1], v18, off 1433; GFX9-NEXT: s_waitcnt vmcnt(0) 1434; GFX9-NEXT: v_mov_b32_e32 v18, s18 1435; GFX9-NEXT: global_store_dword v[0:1], v18, off 1436; GFX9-NEXT: s_waitcnt vmcnt(0) 1437; GFX9-NEXT: v_mov_b32_e32 v18, s19 1438; GFX9-NEXT: global_store_dword v[0:1], v18, off 1439; GFX9-NEXT: s_waitcnt vmcnt(0) 1440; GFX9-NEXT: v_mov_b32_e32 v18, s20 1441; GFX9-NEXT: global_store_dword v[0:1], v18, off 1442; GFX9-NEXT: s_waitcnt vmcnt(0) 1443; GFX9-NEXT: v_mov_b32_e32 v18, s21 1444; GFX9-NEXT: global_store_dword v[0:1], v18, off 1445; GFX9-NEXT: s_waitcnt vmcnt(0) 1446; GFX9-NEXT: v_mov_b32_e32 v18, s22 1447; GFX9-NEXT: global_store_dword v[0:1], v18, off 1448; GFX9-NEXT: s_waitcnt vmcnt(0) 1449; GFX9-NEXT: v_mov_b32_e32 v18, s23 1450; GFX9-NEXT: global_store_dword v[0:1], v18, off 1451; GFX9-NEXT: s_waitcnt vmcnt(0) 1452; GFX9-NEXT: v_mov_b32_e32 v18, s24 1453; GFX9-NEXT: global_store_dword v[0:1], v18, off 1454; GFX9-NEXT: s_waitcnt vmcnt(0) 1455; GFX9-NEXT: v_mov_b32_e32 v18, s25 1456; GFX9-NEXT: global_store_dword v[0:1], v18, off 1457; GFX9-NEXT: s_waitcnt vmcnt(0) 1458; GFX9-NEXT: v_mov_b32_e32 v18, s26 1459; GFX9-NEXT: global_store_dword v[0:1], v18, off 1460; GFX9-NEXT: s_waitcnt vmcnt(0) 1461; GFX9-NEXT: v_mov_b32_e32 v18, s27 1462; GFX9-NEXT: global_store_dword v[0:1], v18, off 1463; GFX9-NEXT: s_waitcnt vmcnt(0) 1464; GFX9-NEXT: v_mov_b32_e32 v18, s28 1465; GFX9-NEXT: global_store_dword v[0:1], v18, off 1466; GFX9-NEXT: s_waitcnt vmcnt(0) 1467; GFX9-NEXT: v_mov_b32_e32 v18, s29 1468; GFX9-NEXT: global_store_dword v[0:1], v18, off 1469; GFX9-NEXT: s_waitcnt vmcnt(0) 1470; GFX9-NEXT: global_store_dword v[0:1], v0, off 1471; GFX9-NEXT: s_waitcnt vmcnt(0) 1472; GFX9-NEXT: global_store_dword v[0:1], v1, off 1473; GFX9-NEXT: s_waitcnt vmcnt(0) 1474; GFX9-NEXT: global_store_dword v[0:1], v2, off 1475; GFX9-NEXT: s_waitcnt vmcnt(0) 1476; GFX9-NEXT: global_store_dword v[0:1], v3, off 1477; GFX9-NEXT: s_waitcnt vmcnt(0) 1478; GFX9-NEXT: global_store_dword v[0:1], v4, off 1479; GFX9-NEXT: s_waitcnt vmcnt(0) 1480; GFX9-NEXT: global_store_dword v[0:1], v5, off 1481; GFX9-NEXT: s_waitcnt vmcnt(0) 1482; GFX9-NEXT: global_store_dword v[0:1], v6, off 1483; GFX9-NEXT: s_waitcnt vmcnt(0) 1484; GFX9-NEXT: global_store_dword v[0:1], v7, off 1485; GFX9-NEXT: s_waitcnt vmcnt(0) 1486; GFX9-NEXT: global_store_dword v[0:1], v8, off 1487; GFX9-NEXT: s_waitcnt vmcnt(0) 1488; GFX9-NEXT: global_store_dword v[0:1], v9, off 1489; GFX9-NEXT: s_waitcnt vmcnt(0) 1490; GFX9-NEXT: global_store_dword v[0:1], v10, off 1491; GFX9-NEXT: s_waitcnt vmcnt(0) 1492; GFX9-NEXT: global_store_dword v[0:1], v11, off 1493; GFX9-NEXT: s_waitcnt vmcnt(0) 1494; GFX9-NEXT: global_store_dword v[0:1], v12, off 1495; GFX9-NEXT: s_waitcnt vmcnt(0) 1496; GFX9-NEXT: global_store_dword v[0:1], v13, off 1497; GFX9-NEXT: s_waitcnt vmcnt(0) 1498; GFX9-NEXT: global_store_dword v[0:1], v14, off 1499; GFX9-NEXT: s_waitcnt vmcnt(0) 1500; GFX9-NEXT: global_store_dword v[0:1], v15, off 1501; GFX9-NEXT: s_waitcnt vmcnt(0) 1502; GFX9-NEXT: global_store_dword v[0:1], v16, off 1503; GFX9-NEXT: s_waitcnt vmcnt(0) 1504; GFX9-NEXT: global_store_dword v[0:1], v17, off 1505; GFX9-NEXT: s_waitcnt vmcnt(0) 1506; GFX9-NEXT: s_setpc_b64 s[30:31] 1507; 1508; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg: 1509; GFX11: ; %bb.0: 1510; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1511; GFX11-NEXT: v_dual_mov_b32 v14, s0 :: v_dual_mov_b32 v15, s1 1512; GFX11-NEXT: v_mov_b32_e32 v16, s2 1513; GFX11-NEXT: v_mov_b32_e32 v18, s19 1514; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc 1515; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1516; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc 1517; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1518; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc 1519; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1520; GFX11-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v17, s18 1521; GFX11-NEXT: v_dual_mov_b32 v15, s16 :: v_dual_mov_b32 v16, s17 1522; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc 1523; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1524; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc 1525; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1526; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc 1527; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1528; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc 1529; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1530; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc 1531; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1532; GFX11-NEXT: v_dual_mov_b32 v15, s21 :: v_dual_mov_b32 v14, s20 1533; GFX11-NEXT: v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23 1534; GFX11-NEXT: v_mov_b32_e32 v18, s24 1535; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc 1536; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1537; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc 1538; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1539; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc 1540; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1541; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc 1542; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1543; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc 1544; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1545; GFX11-NEXT: v_dual_mov_b32 v17, s28 :: v_dual_mov_b32 v14, s25 1546; GFX11-NEXT: v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v16, s27 1547; GFX11-NEXT: v_mov_b32_e32 v18, s29 1548; GFX11-NEXT: global_store_b32 v[0:1], v14, off dlc 1549; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1550; GFX11-NEXT: global_store_b32 v[0:1], v15, off dlc 1551; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1552; GFX11-NEXT: global_store_b32 v[0:1], v16, off dlc 1553; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1554; GFX11-NEXT: global_store_b32 v[0:1], v17, off dlc 1555; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1556; GFX11-NEXT: global_store_b32 v[0:1], v18, off dlc 1557; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1558; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc 1559; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1560; GFX11-NEXT: global_store_b32 v[0:1], v1, off dlc 1561; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1562; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc 1563; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1564; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc 1565; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1566; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc 1567; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1568; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc 1569; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1570; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc 1571; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1572; GFX11-NEXT: global_store_b32 v[0:1], v7, off dlc 1573; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1574; GFX11-NEXT: global_store_b32 v[0:1], v8, off dlc 1575; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1576; GFX11-NEXT: global_store_b32 v[0:1], v9, off dlc 1577; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1578; GFX11-NEXT: global_store_b32 v[0:1], v10, off dlc 1579; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1580; GFX11-NEXT: global_store_b32 v[0:1], v11, off dlc 1581; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1582; GFX11-NEXT: global_store_b32 v[0:1], v12, off dlc 1583; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1584; GFX11-NEXT: global_store_b32 v[0:1], v13, off dlc 1585; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1586; GFX11-NEXT: s_setpc_b64 s[30:31] 1587 i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7, 1588 i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15, 1589 i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23, 1590 i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) { 1591 ;%val = call i32 @llvm.amdgcn.workitem.id.x() 1592 ;store volatile i32 %val, ptr addrspace(1) undef 1593 1594 store volatile i32 %arg0, ptr addrspace(1) undef 1595 store volatile i32 %arg1, ptr addrspace(1) undef 1596 store volatile i32 %arg2, ptr addrspace(1) undef 1597 store volatile i32 %arg3, ptr addrspace(1) undef 1598 store volatile i32 %arg4, ptr addrspace(1) undef 1599 store volatile i32 %arg5, ptr addrspace(1) undef 1600 store volatile i32 %arg6, ptr addrspace(1) undef 1601 store volatile i32 %arg7, ptr addrspace(1) undef 1602 1603 store volatile i32 %arg8, ptr addrspace(1) undef 1604 store volatile i32 %arg9, ptr addrspace(1) undef 1605 store volatile i32 %arg10, ptr addrspace(1) undef 1606 store volatile i32 %arg11, ptr addrspace(1) undef 1607 store volatile i32 %arg12, ptr addrspace(1) undef 1608 store volatile i32 %arg13, ptr addrspace(1) undef 1609 store volatile i32 %arg14, ptr addrspace(1) undef 1610 store volatile i32 %arg15, ptr addrspace(1) undef 1611 1612 store volatile i32 %arg16, ptr addrspace(1) undef 1613 store volatile i32 %arg17, ptr addrspace(1) undef 1614 store volatile i32 %arg18, ptr addrspace(1) undef 1615 store volatile i32 %arg19, ptr addrspace(1) undef 1616 store volatile i32 %arg20, ptr addrspace(1) undef 1617 store volatile i32 %arg21, ptr addrspace(1) undef 1618 store volatile i32 %arg22, ptr addrspace(1) undef 1619 store volatile i32 %arg23, ptr addrspace(1) undef 1620 1621 store volatile i32 %arg24, ptr addrspace(1) undef 1622 store volatile i32 %arg25, ptr addrspace(1) undef 1623 store volatile i32 %arg26, ptr addrspace(1) undef 1624 store volatile i32 %arg27, ptr addrspace(1) undef 1625 store volatile i32 %arg28, ptr addrspace(1) undef 1626 store volatile i32 %arg29, ptr addrspace(1) undef 1627 store volatile i32 %arg30, ptr addrspace(1) undef 1628 store volatile i32 %arg31, ptr addrspace(1) undef 1629 1630 ret void 1631} 1632 1633define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { 1634; GFX9-LABEL: void_func_i32_v2float_inreg: 1635; GFX9: ; %bb.0: 1636; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1637; GFX9-NEXT: v_mov_b32_e32 v0, s16 1638; GFX9-NEXT: global_store_dword v[0:1], v0, off 1639; GFX9-NEXT: v_mov_b32_e32 v0, s17 1640; GFX9-NEXT: v_mov_b32_e32 v1, s18 1641; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1642; GFX9-NEXT: s_waitcnt vmcnt(0) 1643; GFX9-NEXT: s_setpc_b64 s[30:31] 1644; 1645; GFX11-LABEL: void_func_i32_v2float_inreg: 1646; GFX11: ; %bb.0: 1647; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1648; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2 1649; GFX11-NEXT: v_mov_b32_e32 v0, s1 1650; GFX11-NEXT: s_clause 0x1 1651; GFX11-NEXT: global_store_b32 v[0:1], v2, off 1652; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 1653; GFX11-NEXT: s_setpc_b64 s[30:31] 1654 store i32 %arg0, ptr addrspace(1) undef 1655 store <2 x float> %arg1, ptr addrspace(1) undef 1656 ret void 1657} 1658 1659define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 { 1660; GFX9-LABEL: caller_void_func_i32_v2float_inreg: 1661; GFX9: ; %bb.0: 1662; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1663; GFX9-NEXT: s_mov_b32 s19, s33 1664; GFX9-NEXT: s_mov_b32 s33, s32 1665; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 1666; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1667; GFX9-NEXT: s_mov_b64 exec, s[20:21] 1668; GFX9-NEXT: s_addk_i32 s32, 0x400 1669; GFX9-NEXT: s_getpc_b64 s[20:21] 1670; GFX9-NEXT: s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 1671; GFX9-NEXT: s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 1672; GFX9-NEXT: s_load_dwordx2 s[20:21], s[20:21], 0x0 1673; GFX9-NEXT: v_writelane_b32 v40, s19, 2 1674; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1675; GFX9-NEXT: s_mov_b32 s2, s18 1676; GFX9-NEXT: s_mov_b32 s1, s17 1677; GFX9-NEXT: s_mov_b32 s0, s16 1678; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1679; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1680; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] 1681; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1682; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1683; GFX9-NEXT: s_mov_b32 s32, s33 1684; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1685; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1686; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1687; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1688; GFX9-NEXT: s_mov_b32 s33, s4 1689; GFX9-NEXT: s_waitcnt vmcnt(0) 1690; GFX9-NEXT: s_setpc_b64 s[30:31] 1691; 1692; GFX11-LABEL: caller_void_func_i32_v2float_inreg: 1693; GFX11: ; %bb.0: 1694; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1695; GFX11-NEXT: s_mov_b32 s3, s33 1696; GFX11-NEXT: s_mov_b32 s33, s32 1697; GFX11-NEXT: s_or_saveexec_b32 s16, -1 1698; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1699; GFX11-NEXT: s_mov_b32 exec_lo, s16 1700; GFX11-NEXT: s_add_i32 s32, s32, 16 1701; GFX11-NEXT: s_getpc_b64 s[16:17] 1702; GFX11-NEXT: s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 1703; GFX11-NEXT: s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 1704; GFX11-NEXT: v_writelane_b32 v40, s3, 2 1705; GFX11-NEXT: s_load_b64 s[16:17], s[16:17], 0x0 1706; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1707; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1708; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1709; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] 1710; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1711; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1712; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1713; GFX11-NEXT: s_mov_b32 s32, s33 1714; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1715; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1716; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1717; GFX11-NEXT: s_mov_b32 exec_lo, s1 1718; GFX11-NEXT: s_mov_b32 s33, s0 1719; GFX11-NEXT: s_waitcnt vmcnt(0) 1720; GFX11-NEXT: s_setpc_b64 s[30:31] 1721; GFX11-NEXT s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1722; GFX11-NEXT s_mov_b32 s3, s33 1723; GFX11-NEXT s_mov_b32 s33, s32 1724; GFX11-NEXT s_or_saveexec_b32 s4, -1 1725; GFX11-NEXT scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1726; GFX11-NEXT s_mov_b32 exec_lo, s4 1727; GFX11-NEXT s_add_i32 s32, s32, 16 1728; GFX11-NEXT s_getpc_b64 s[4:5] 1729; GFX11-NEXT s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4 1730; GFX11-NEXT s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12 1731; GFX11-NEXT v_writelane_b32 v40, s3, 2 1732; GFX11-NEXT s_load_b64 s[4:5], s[4:5], 0x0 1733; GFX11-NEXT v_writelane_b32 v40, s30, 0 1734; GFX11-NEXT v_writelane_b32 v40, s31, 1 1735; GFX11-NEXT s_waitcnt lgkmcnt(0) 1736; GFX11-NEXT s_swappc_b64 s[30:31], s[4:5] 1737; GFX11-NEXT s_delay_alu instid0(VALU_DEP_1) 1738; GFX11-NEXT v_readlane_b32 s31, v40, 1 1739; GFX11-NEXT v_readlane_b32 s30, v40, 0 1740; GFX11-NEXT v_readlane_b32 s0, v40, 2 1741; GFX11-NEXT s_or_saveexec_b32 s1, -1 1742; GFX11-NEXT scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1743; GFX11-NEXT s_mov_b32 exec_lo, s1 1744; GFX11-NEXT s_add_i32 s32, s32, -16 1745; GFX11-NEXT s_mov_b32 s33, s0 1746; GFX11-NEXT s_waitcnt vmcnt(0) 1747; GFX11-NEXT s_setpc_b64 s[30:31] 1748 call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) 1749 ret void 1750} 1751 1752define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 { 1753; GFX9-LABEL: void_func_bf16_inreg: 1754; GFX9: ; %bb.0: 1755; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1756; GFX9-NEXT: v_mov_b32_e32 v0, s16 1757; GFX9-NEXT: global_store_short v[0:1], v0, off 1758; GFX9-NEXT: s_waitcnt vmcnt(0) 1759; GFX9-NEXT: s_setpc_b64 s[30:31] 1760; 1761; GFX11-LABEL: void_func_bf16_inreg: 1762; GFX11: ; %bb.0: 1763; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1764; GFX11-NEXT: v_mov_b32_e32 v0, s0 1765; GFX11-NEXT: global_store_b16 v[0:1], v0, off 1766; GFX11-NEXT: s_setpc_b64 s[30:31] 1767 store bfloat %arg0, ptr addrspace(1) undef 1768 ret void 1769} 1770 1771define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 { 1772; GFX9-LABEL: void_func_v2bf16_inreg: 1773; GFX9: ; %bb.0: 1774; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1775; GFX9-NEXT: v_mov_b32_e32 v0, s16 1776; GFX9-NEXT: global_store_dword v[0:1], v0, off 1777; GFX9-NEXT: s_waitcnt vmcnt(0) 1778; GFX9-NEXT: s_setpc_b64 s[30:31] 1779; 1780; GFX11-LABEL: void_func_v2bf16_inreg: 1781; GFX11: ; %bb.0: 1782; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1783; GFX11-NEXT: v_mov_b32_e32 v0, s0 1784; GFX11-NEXT: global_store_b32 v[0:1], v0, off 1785; GFX11-NEXT: s_setpc_b64 s[30:31] 1786 store <2 x bfloat> %arg0, ptr addrspace(1) undef 1787 ret void 1788} 1789 1790define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 { 1791; GFX9-LABEL: void_func_v3bf16_inreg: 1792; GFX9: ; %bb.0: 1793; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1794; GFX9-NEXT: v_mov_b32_e32 v0, s17 1795; GFX9-NEXT: global_store_short v[0:1], v0, off 1796; GFX9-NEXT: v_mov_b32_e32 v0, s16 1797; GFX9-NEXT: global_store_dword v[0:1], v0, off 1798; GFX9-NEXT: s_waitcnt vmcnt(0) 1799; GFX9-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX11-LABEL: void_func_v3bf16_inreg: 1802; GFX11: ; %bb.0: 1803; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX11-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 1805; GFX11-NEXT: s_clause 0x1 1806; GFX11-NEXT: global_store_b16 v[0:1], v0, off 1807; GFX11-NEXT: global_store_b32 v[0:1], v1, off 1808; GFX11-NEXT: s_setpc_b64 s[30:31] 1809 store <3 x bfloat> %arg0, ptr addrspace(1) undef 1810 ret void 1811} 1812 1813define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 { 1814; GFX9-LABEL: void_func_v4bf16_inreg: 1815; GFX9: ; %bb.0: 1816; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1817; GFX9-NEXT: v_mov_b32_e32 v0, s16 1818; GFX9-NEXT: v_mov_b32_e32 v1, s17 1819; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off 1820; GFX9-NEXT: s_waitcnt vmcnt(0) 1821; GFX9-NEXT: s_setpc_b64 s[30:31] 1822; 1823; GFX11-LABEL: void_func_v4bf16_inreg: 1824; GFX11: ; %bb.0: 1825; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1827; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off 1828; GFX11-NEXT: s_setpc_b64 s[30:31] 1829 store <4 x bfloat> %arg0, ptr addrspace(1) undef 1830 ret void 1831} 1832 1833define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 { 1834; GFX9-LABEL: void_func_v8bf16_inreg: 1835; GFX9: ; %bb.0: 1836; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1837; GFX9-NEXT: v_mov_b32_e32 v0, s16 1838; GFX9-NEXT: v_mov_b32_e32 v1, s17 1839; GFX9-NEXT: v_mov_b32_e32 v2, s18 1840; GFX9-NEXT: v_mov_b32_e32 v3, s19 1841; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1842; GFX9-NEXT: s_waitcnt vmcnt(0) 1843; GFX9-NEXT: s_setpc_b64 s[30:31] 1844; 1845; GFX11-LABEL: void_func_v8bf16_inreg: 1846; GFX11: ; %bb.0: 1847; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1848; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 1849; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 1850; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1851; GFX11-NEXT: s_setpc_b64 s[30:31] 1852 store <8 x bfloat> %arg0, ptr addrspace(1) undef 1853 ret void 1854} 1855 1856define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 { 1857; GFX9-LABEL: void_func_v16bf16_inreg: 1858; GFX9: ; %bb.0: 1859; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1860; GFX9-NEXT: v_mov_b32_e32 v0, s20 1861; GFX9-NEXT: v_mov_b32_e32 v1, s21 1862; GFX9-NEXT: v_mov_b32_e32 v2, s22 1863; GFX9-NEXT: v_mov_b32_e32 v3, s23 1864; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1865; GFX9-NEXT: s_nop 0 1866; GFX9-NEXT: v_mov_b32_e32 v0, s16 1867; GFX9-NEXT: v_mov_b32_e32 v1, s17 1868; GFX9-NEXT: v_mov_b32_e32 v2, s18 1869; GFX9-NEXT: v_mov_b32_e32 v3, s19 1870; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 1871; GFX9-NEXT: s_waitcnt vmcnt(0) 1872; GFX9-NEXT: s_setpc_b64 s[30:31] 1873; 1874; GFX11-LABEL: void_func_v16bf16_inreg: 1875; GFX11: ; %bb.0: 1876; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1877; GFX11-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 1878; GFX11-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 1879; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1 1880; GFX11-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 1881; GFX11-NEXT: s_clause 0x1 1882; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 1883; GFX11-NEXT: global_store_b128 v[0:1], v[4:7], off 1884; GFX11-NEXT: s_setpc_b64 s[30:31] 1885 store <16 x bfloat> %arg0, ptr addrspace(1) undef 1886 ret void 1887} 1888 1889define void @void_func_2_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, ptr addrspace(1) %ptr) { 1890; GFX9-LABEL: void_func_2_i32_inreg: 1891; GFX9: ; %bb.0: 1892; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1893; GFX9-NEXT: v_mov_b32_e32 v2, s16 1894; GFX9-NEXT: global_store_dword v[0:1], v2, off 1895; GFX9-NEXT: s_waitcnt vmcnt(0) 1896; GFX9-NEXT: v_mov_b32_e32 v2, s17 1897; GFX9-NEXT: global_store_dword v[0:1], v2, off 1898; GFX9-NEXT: s_waitcnt vmcnt(0) 1899; GFX9-NEXT: s_setpc_b64 s[30:31] 1900; 1901; GFX11-LABEL: void_func_2_i32_inreg: 1902; GFX11: ; %bb.0: 1903; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1904; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 1905; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc 1906; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1907; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc 1908; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1909; GFX11-NEXT: s_setpc_b64 s[30:31] 1910 store volatile i32 %arg0, ptr addrspace(1) %ptr 1911 store volatile i32 %arg1, ptr addrspace(1) %ptr 1912 ret void 1913} 1914 1915define void @void_func_2_i64_inreg(i64 inreg %arg0, i64 inreg %arg1, ptr addrspace(1) %ptr) { 1916; GFX9-LABEL: void_func_2_i64_inreg: 1917; GFX9: ; %bb.0: 1918; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1919; GFX9-NEXT: v_mov_b32_e32 v4, s16 1920; GFX9-NEXT: v_mov_b32_e32 v5, s17 1921; GFX9-NEXT: v_mov_b32_e32 v2, s18 1922; GFX9-NEXT: v_mov_b32_e32 v3, s19 1923; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off 1924; GFX9-NEXT: s_waitcnt vmcnt(0) 1925; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off 1926; GFX9-NEXT: s_waitcnt vmcnt(0) 1927; GFX9-NEXT: s_setpc_b64 s[30:31] 1928; 1929; GFX11-LABEL: void_func_2_i64_inreg: 1930; GFX11: ; %bb.0: 1931; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1932; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s1 1933; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s3 1934; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc 1935; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1936; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc 1937; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1938; GFX11-NEXT: s_setpc_b64 s[30:31] 1939 store volatile i64 %arg0, ptr addrspace(1) %ptr 1940 store volatile i64 %arg1, ptr addrspace(1) %ptr 1941 ret void 1942} 1943 1944define void @void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2, ptr addrspace(1) %ptr) { 1945; GFX9-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg: 1946; GFX9: ; %bb.0: 1947; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1948; GFX9-NEXT: v_mov_b32_e32 v4, s16 1949; GFX9-NEXT: v_mov_b32_e32 v5, s17 1950; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off 1951; GFX9-NEXT: s_waitcnt vmcnt(0) 1952; GFX9-NEXT: v_mov_b32_e32 v4, s18 1953; GFX9-NEXT: v_mov_b32_e32 v2, s19 1954; GFX9-NEXT: v_mov_b32_e32 v3, s20 1955; GFX9-NEXT: global_store_dword v[0:1], v4, off 1956; GFX9-NEXT: s_waitcnt vmcnt(0) 1957; GFX9-NEXT: global_store_dwordx2 v[0:1], v[2:3], off 1958; GFX9-NEXT: s_waitcnt vmcnt(0) 1959; GFX9-NEXT: s_setpc_b64 s[30:31] 1960; 1961; GFX11-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg: 1962; GFX11: ; %bb.0: 1963; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1964; GFX11-NEXT: v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s1 1965; GFX11-NEXT: v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s16 1966; GFX11-NEXT: v_mov_b32_e32 v6, s2 1967; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off dlc 1968; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1969; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc 1970; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1971; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off dlc 1972; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1973; GFX11-NEXT: s_setpc_b64 s[30:31] 1974 store volatile i64 %arg0, ptr addrspace(1) %ptr 1975 store volatile i32 %arg1, ptr addrspace(1) %ptr 1976 store volatile i64 %arg2, ptr addrspace(1) %ptr 1977 ret void 1978} 1979 1980define void @void_func_5_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, ptr addrspace(1) %ptr) { 1981; GFX9-LABEL: void_func_5_i32_inreg: 1982; GFX9: ; %bb.0: 1983; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1984; GFX9-NEXT: v_mov_b32_e32 v2, s16 1985; GFX9-NEXT: global_store_dword v[0:1], v2, off 1986; GFX9-NEXT: s_waitcnt vmcnt(0) 1987; GFX9-NEXT: v_mov_b32_e32 v2, s17 1988; GFX9-NEXT: global_store_dword v[0:1], v2, off 1989; GFX9-NEXT: s_waitcnt vmcnt(0) 1990; GFX9-NEXT: v_mov_b32_e32 v2, s18 1991; GFX9-NEXT: global_store_dword v[0:1], v2, off 1992; GFX9-NEXT: s_waitcnt vmcnt(0) 1993; GFX9-NEXT: v_mov_b32_e32 v2, s19 1994; GFX9-NEXT: global_store_dword v[0:1], v2, off 1995; GFX9-NEXT: s_waitcnt vmcnt(0) 1996; GFX9-NEXT: v_mov_b32_e32 v2, s20 1997; GFX9-NEXT: global_store_dword v[0:1], v2, off 1998; GFX9-NEXT: s_waitcnt vmcnt(0) 1999; GFX9-NEXT: s_setpc_b64 s[30:31] 2000; 2001; GFX11-LABEL: void_func_5_i32_inreg: 2002; GFX11: ; %bb.0: 2003; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2004; GFX11-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 2005; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 2006; GFX11-NEXT: v_mov_b32_e32 v6, s16 2007; GFX11-NEXT: global_store_b32 v[0:1], v2, off dlc 2008; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2009; GFX11-NEXT: global_store_b32 v[0:1], v3, off dlc 2010; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2011; GFX11-NEXT: global_store_b32 v[0:1], v4, off dlc 2012; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2013; GFX11-NEXT: global_store_b32 v[0:1], v5, off dlc 2014; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2015; GFX11-NEXT: global_store_b32 v[0:1], v6, off dlc 2016; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2017; GFX11-NEXT: s_setpc_b64 s[30:31] 2018 store volatile i32 %arg0, ptr addrspace(1) %ptr 2019 store volatile i32 %arg1, ptr addrspace(1) %ptr 2020 store volatile i32 %arg2, ptr addrspace(1) %ptr 2021 store volatile i32 %arg3, ptr addrspace(1) %ptr 2022 store volatile i32 %arg4, ptr addrspace(1) %ptr 2023 ret void 2024} 2025 2026define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2027; GFX9-LABEL: void_func_a5i32_inreg: 2028; GFX9: ; %bb.0: 2029; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2030; GFX9-NEXT: v_mov_b32_e32 v2, s20 2031; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:16 2032; GFX9-NEXT: v_mov_b32_e32 v5, s19 2033; GFX9-NEXT: v_mov_b32_e32 v4, s18 2034; GFX9-NEXT: v_mov_b32_e32 v3, s17 2035; GFX9-NEXT: v_mov_b32_e32 v2, s16 2036; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 2037; GFX9-NEXT: s_waitcnt vmcnt(0) 2038; GFX9-NEXT: s_setpc_b64 s[30:31] 2039; 2040; GFX11-LABEL: void_func_a5i32_inreg: 2041; GFX11: ; %bb.0: 2042; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2043; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v5, s3 2044; GFX11-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v3, s1 2045; GFX11-NEXT: v_mov_b32_e32 v2, s0 2046; GFX11-NEXT: s_clause 0x1 2047; GFX11-NEXT: global_store_b32 v[0:1], v6, off offset:16 2048; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 2049; GFX11-NEXT: s_setpc_b64 s[30:31] 2050 store [5 x i32] %arg0, ptr addrspace(1) %ptr 2051 ret void 2052} 2053 2054; Force all implicit inputs to be required 2055declare void @extern() 2056 2057define void @void_func_a13i32_inreg([13 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2058; GFX9-LABEL: void_func_a13i32_inreg: 2059; GFX9: ; %bb.0: 2060; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2061; GFX9-NEXT: s_mov_b32 s29, s33 2062; GFX9-NEXT: s_mov_b32 s33, s32 2063; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 2064; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 2065; GFX9-NEXT: s_mov_b64 exec, vcc 2066; GFX9-NEXT: v_mov_b32_e32 v2, s28 2067; GFX9-NEXT: global_store_dword v[0:1], v2, off offset:48 2068; GFX9-NEXT: v_mov_b32_e32 v5, s27 2069; GFX9-NEXT: v_mov_b32_e32 v4, s26 2070; GFX9-NEXT: v_mov_b32_e32 v3, s25 2071; GFX9-NEXT: v_mov_b32_e32 v2, s24 2072; GFX9-NEXT: s_addk_i32 s32, 0x400 2073; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:32 2074; GFX9-NEXT: v_writelane_b32 v40, s29, 2 2075; GFX9-NEXT: v_mov_b32_e32 v5, s23 2076; GFX9-NEXT: v_mov_b32_e32 v4, s22 2077; GFX9-NEXT: v_mov_b32_e32 v3, s21 2078; GFX9-NEXT: v_mov_b32_e32 v2, s20 2079; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:16 2080; GFX9-NEXT: v_writelane_b32 v40, s30, 0 2081; GFX9-NEXT: v_mov_b32_e32 v3, s17 2082; GFX9-NEXT: v_mov_b32_e32 v2, s16 2083; GFX9-NEXT: s_getpc_b64 s[16:17] 2084; GFX9-NEXT: s_add_u32 s16, s16, extern@gotpcrel32@lo+4 2085; GFX9-NEXT: s_addc_u32 s17, s17, extern@gotpcrel32@hi+12 2086; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0 2087; GFX9-NEXT: v_mov_b32_e32 v5, s19 2088; GFX9-NEXT: v_mov_b32_e32 v4, s18 2089; GFX9-NEXT: v_writelane_b32 v40, s31, 1 2090; GFX9-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 2091; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2092; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17] 2093; GFX9-NEXT: v_readlane_b32 s31, v40, 1 2094; GFX9-NEXT: v_readlane_b32 s30, v40, 0 2095; GFX9-NEXT: s_mov_b32 s32, s33 2096; GFX9-NEXT: v_readlane_b32 s4, v40, 2 2097; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 2098; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 2099; GFX9-NEXT: s_mov_b64 exec, s[6:7] 2100; GFX9-NEXT: s_mov_b32 s33, s4 2101; GFX9-NEXT: s_waitcnt vmcnt(0) 2102; GFX9-NEXT: s_setpc_b64 s[30:31] 2103; 2104; GFX11-LABEL: void_func_a13i32_inreg: 2105; GFX11: ; %bb.0: 2106; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2107; GFX11-NEXT: s_mov_b32 s25, s33 2108; GFX11-NEXT: s_mov_b32 s33, s32 2109; GFX11-NEXT: s_or_saveexec_b32 s26, -1 2110; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 2111; GFX11-NEXT: s_mov_b32 exec_lo, s26 2112; GFX11-NEXT: s_add_i32 s32, s32, 16 2113; GFX11-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21 2114; GFX11-NEXT: v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19 2115; GFX11-NEXT: s_getpc_b64 s[20:21] 2116; GFX11-NEXT: s_add_u32 s20, s20, extern@gotpcrel32@lo+4 2117; GFX11-NEXT: s_addc_u32 s21, s21, extern@gotpcrel32@hi+12 2118; GFX11-NEXT: v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17 2119; GFX11-NEXT: v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3 2120; GFX11-NEXT: s_load_b64 s[16:17], s[20:21], 0x0 2121; GFX11-NEXT: v_writelane_b32 v40, s25, 2 2122; GFX11-NEXT: v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23 2123; GFX11-NEXT: v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1 2124; GFX11-NEXT: v_writelane_b32 v40, s30, 0 2125; GFX11-NEXT: v_mov_b32_e32 v10, s0 2126; GFX11-NEXT: s_clause 0x3 2127; GFX11-NEXT: global_store_b32 v[0:1], v14, off offset:48 2128; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:32 2129; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:16 2130; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off 2131; GFX11-NEXT: v_writelane_b32 v40, s31, 1 2132; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2133; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] 2134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2135; GFX11-NEXT: v_readlane_b32 s31, v40, 1 2136; GFX11-NEXT: v_readlane_b32 s30, v40, 0 2137; GFX11-NEXT: s_mov_b32 s32, s33 2138; GFX11-NEXT: v_readlane_b32 s0, v40, 2 2139; GFX11-NEXT: s_or_saveexec_b32 s1, -1 2140; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 2141; GFX11-NEXT: s_mov_b32 exec_lo, s1 2142; GFX11-NEXT: s_mov_b32 s33, s0 2143; GFX11-NEXT: s_waitcnt vmcnt(0) 2144; GFX11-NEXT: s_setpc_b64 s[30:31] 2145 store [13 x i32] %arg0, ptr addrspace(1) %ptr 2146 call void @extern() 2147 ret void 2148} 2149 2150; define void @void_func_a14i32_inreg([14 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2151; store [14 x i32] %arg0, ptr addrspace(1) %ptr 2152; call void @extern() 2153; ret void 2154; } 2155 2156; FIXME: 2157; define void @void_func_a15i32_inreg([15 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2158; store [15 x i32] %arg0, ptr addrspace(1) %ptr 2159; call void @extern() 2160; ret void 2161; } 2162 2163; FIXME: 2164; define void @void_func_a16i32_inreg([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2165; store [16 x i32] %arg0, ptr addrspace(1) %ptr 2166; call void @extern() 2167; ret void 2168; } 2169 2170; FIXME: Should still fail 2171define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) { 2172; GFX9-LABEL: void_func_a16i32_inreg__noimplicit: 2173; GFX9: ; %bb.0: 2174; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2175; GFX9-NEXT: v_mov_b32_e32 v7, v1 2176; GFX9-NEXT: v_mov_b32_e32 v6, v0 2177; GFX9-NEXT: v_mov_b32_e32 v5, s29 2178; GFX9-NEXT: v_mov_b32_e32 v4, s28 2179; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:48 2180; GFX9-NEXT: s_nop 0 2181; GFX9-NEXT: v_mov_b32_e32 v7, s27 2182; GFX9-NEXT: v_mov_b32_e32 v6, s26 2183; GFX9-NEXT: v_mov_b32_e32 v5, s25 2184; GFX9-NEXT: v_mov_b32_e32 v4, s24 2185; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:32 2186; GFX9-NEXT: s_nop 0 2187; GFX9-NEXT: v_mov_b32_e32 v7, s23 2188; GFX9-NEXT: v_mov_b32_e32 v6, s22 2189; GFX9-NEXT: v_mov_b32_e32 v5, s21 2190; GFX9-NEXT: v_mov_b32_e32 v4, s20 2191; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off offset:16 2192; GFX9-NEXT: s_nop 0 2193; GFX9-NEXT: v_mov_b32_e32 v7, s19 2194; GFX9-NEXT: v_mov_b32_e32 v6, s18 2195; GFX9-NEXT: v_mov_b32_e32 v5, s17 2196; GFX9-NEXT: v_mov_b32_e32 v4, s16 2197; GFX9-NEXT: global_store_dwordx4 v[2:3], v[4:7], off 2198; GFX9-NEXT: s_waitcnt vmcnt(0) 2199; GFX9-NEXT: s_setpc_b64 s[30:31] 2200; 2201; GFX11-LABEL: void_func_a16i32_inreg__noimplicit: 2202; GFX11: ; %bb.0: 2203; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2204; GFX11-NEXT: v_dual_mov_b32 v5, s27 :: v_dual_mov_b32 v4, s26 2205; GFX11-NEXT: v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s24 2206; GFX11-NEXT: v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22 2207; GFX11-NEXT: v_dual_mov_b32 v7, s21 :: v_dual_mov_b32 v6, s20 2208; GFX11-NEXT: v_dual_mov_b32 v13, s19 :: v_dual_mov_b32 v12, s18 2209; GFX11-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16 2210; GFX11-NEXT: v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2 2211; GFX11-NEXT: v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0 2212; GFX11-NEXT: s_clause 0x3 2213; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off offset:48 2214; GFX11-NEXT: global_store_b128 v[0:1], v[6:9], off offset:32 2215; GFX11-NEXT: global_store_b128 v[0:1], v[10:13], off offset:16 2216; GFX11-NEXT: global_store_b128 v[0:1], v[14:17], off 2217; GFX11-NEXT: s_setpc_b64 s[30:31] 2218 store [16 x i32] %arg0, ptr addrspace(1) %ptr 2219 ret void 2220} 2221 2222attributes #0 = { nounwind } 2223attributes #1 = { nounwind noinline } 2224 2225 2226 2227 2228 2229 2230 2231