1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s 4 5declare hidden void @external_void_func_i8_inreg(i8 inreg) #0 6declare hidden void @external_void_func_i16_inreg(i32 inreg) #0 7declare hidden void @external_void_func_i32_inreg(i32 inreg) #0 8declare hidden void @external_void_func_i64_inreg(i64 inreg) #0 9declare hidden void @external_void_func_v2i32_inreg(<2 x i32> inreg) #0 10declare hidden void @external_void_func_v3i32_inreg(<3 x i32> inreg) #0 11declare hidden void @external_void_func_v4i32_inreg(<4 x i32> inreg) #0 12declare hidden void @external_void_func_v8i32_inreg(<8 x i32> inreg) #0 13declare hidden void @external_void_func_v16i32_inreg(<16 x i32> inreg) #0 14declare hidden void @external_void_func_f16_inreg(half inreg) #0 15declare hidden void @external_void_func_bf16_inreg(bfloat inreg) #0 16declare hidden void @external_void_func_f32_inreg(float inreg) #0 17declare hidden void @external_void_func_f64_inreg(double inreg) #0 18declare hidden void @external_void_func_v2f16_inreg(<2 x half> inreg) #0 19declare hidden void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg) #0 20declare hidden void @external_void_func_v3f16_inreg(<3 x half> inreg) #0 21declare hidden void @external_void_func_v4f16_inreg(<4 x half> inreg) #0 22 23declare hidden void @external_void_func_p0_inreg(ptr inreg) #0 24declare hidden void @external_void_func_p1_inreg(ptr addrspace(1) inreg) #0 25declare hidden void @external_void_func_p3_inreg(ptr addrspace(3) inreg) #0 26declare hidden void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg) #0 27declare hidden void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg) #0 28 29declare hidden void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg, i32 inreg, i64 inreg) #0 30 31declare hidden void @external_void_func_a15i32_inreg([13 x i32] inreg) #0 32declare hidden void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg, i32 inreg) #1 33 34define void @test_call_external_void_func_i8_inreg(i8 inreg %arg) #0 { 35; GFX9-LABEL: test_call_external_void_func_i8_inreg: 36; GFX9: ; %bb.0: 37; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX9-NEXT: s_mov_b32 s17, s33 39; GFX9-NEXT: s_mov_b32 s33, s32 40; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 41; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 42; GFX9-NEXT: s_mov_b64 exec, s[18:19] 43; GFX9-NEXT: v_writelane_b32 v40, s17, 2 44; GFX9-NEXT: s_addk_i32 s32, 0x400 45; GFX9-NEXT: v_writelane_b32 v40, s30, 0 46; GFX9-NEXT: s_mov_b32 s0, s16 47; GFX9-NEXT: v_writelane_b32 v40, s31, 1 48; GFX9-NEXT: s_getpc_b64 s[18:19] 49; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i8_inreg@rel32@lo+4 50; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i8_inreg@rel32@hi+12 51; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 52; GFX9-NEXT: v_readlane_b32 s31, v40, 1 53; GFX9-NEXT: v_readlane_b32 s30, v40, 0 54; GFX9-NEXT: s_mov_b32 s32, s33 55; GFX9-NEXT: v_readlane_b32 s4, v40, 2 56; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 57; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 58; GFX9-NEXT: s_mov_b64 exec, s[6:7] 59; GFX9-NEXT: s_mov_b32 s33, s4 60; GFX9-NEXT: s_waitcnt vmcnt(0) 61; GFX9-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX11-LABEL: test_call_external_void_func_i8_inreg: 64; GFX11: ; %bb.0: 65; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX11-NEXT: s_mov_b32 s1, s33 67; GFX11-NEXT: s_mov_b32 s33, s32 68; GFX11-NEXT: s_or_saveexec_b32 s2, -1 69; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 70; GFX11-NEXT: s_mov_b32 exec_lo, s2 71; GFX11-NEXT: v_writelane_b32 v40, s1, 2 72; GFX11-NEXT: s_add_i32 s32, s32, 16 73; GFX11-NEXT: s_getpc_b64 s[2:3] 74; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_inreg@rel32@lo+4 75; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_inreg@rel32@hi+12 76; GFX11-NEXT: v_writelane_b32 v40, s30, 0 77; GFX11-NEXT: v_writelane_b32 v40, s31, 1 78; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 79; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 80; GFX11-NEXT: v_readlane_b32 s31, v40, 1 81; GFX11-NEXT: v_readlane_b32 s30, v40, 0 82; GFX11-NEXT: s_mov_b32 s32, s33 83; GFX11-NEXT: v_readlane_b32 s0, v40, 2 84; GFX11-NEXT: s_or_saveexec_b32 s1, -1 85; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 86; GFX11-NEXT: s_mov_b32 exec_lo, s1 87; GFX11-NEXT: s_mov_b32 s33, s0 88; GFX11-NEXT: s_waitcnt vmcnt(0) 89; GFX11-NEXT: s_setpc_b64 s[30:31] 90 call void @external_void_func_i8_inreg(i8 inreg %arg) 91 ret void 92} 93 94define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { 95; GFX9-LABEL: test_call_external_void_func_i16_inreg: 96; GFX9: ; %bb.0: 97; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GFX9-NEXT: s_mov_b32 s17, s33 99; GFX9-NEXT: s_mov_b32 s33, s32 100; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 101; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 102; GFX9-NEXT: s_mov_b64 exec, s[18:19] 103; GFX9-NEXT: v_writelane_b32 v40, s17, 2 104; GFX9-NEXT: s_addk_i32 s32, 0x400 105; GFX9-NEXT: v_writelane_b32 v40, s30, 0 106; GFX9-NEXT: s_mov_b32 s0, s16 107; GFX9-NEXT: v_writelane_b32 v40, s31, 1 108; GFX9-NEXT: s_getpc_b64 s[18:19] 109; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i16_inreg@rel32@lo+4 110; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i16_inreg@rel32@hi+12 111; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 112; GFX9-NEXT: v_readlane_b32 s31, v40, 1 113; GFX9-NEXT: v_readlane_b32 s30, v40, 0 114; GFX9-NEXT: s_mov_b32 s32, s33 115; GFX9-NEXT: v_readlane_b32 s4, v40, 2 116; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 117; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 118; GFX9-NEXT: s_mov_b64 exec, s[6:7] 119; GFX9-NEXT: s_mov_b32 s33, s4 120; GFX9-NEXT: s_waitcnt vmcnt(0) 121; GFX9-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX11-LABEL: test_call_external_void_func_i16_inreg: 124; GFX11: ; %bb.0: 125; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX11-NEXT: s_mov_b32 s1, s33 127; GFX11-NEXT: s_mov_b32 s33, s32 128; GFX11-NEXT: s_or_saveexec_b32 s2, -1 129; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 130; GFX11-NEXT: s_mov_b32 exec_lo, s2 131; GFX11-NEXT: v_writelane_b32 v40, s1, 2 132; GFX11-NEXT: s_add_i32 s32, s32, 16 133; GFX11-NEXT: s_getpc_b64 s[2:3] 134; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_inreg@rel32@lo+4 135; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_inreg@rel32@hi+12 136; GFX11-NEXT: v_writelane_b32 v40, s30, 0 137; GFX11-NEXT: v_writelane_b32 v40, s31, 1 138; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 139; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 140; GFX11-NEXT: v_readlane_b32 s31, v40, 1 141; GFX11-NEXT: v_readlane_b32 s30, v40, 0 142; GFX11-NEXT: s_mov_b32 s32, s33 143; GFX11-NEXT: v_readlane_b32 s0, v40, 2 144; GFX11-NEXT: s_or_saveexec_b32 s1, -1 145; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 146; GFX11-NEXT: s_mov_b32 exec_lo, s1 147; GFX11-NEXT: s_mov_b32 s33, s0 148; GFX11-NEXT: s_waitcnt vmcnt(0) 149; GFX11-NEXT: s_setpc_b64 s[30:31] 150 call void @external_void_func_i16_inreg(i16 inreg %arg) 151 ret void 152} 153 154define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { 155; GFX9-LABEL: test_call_external_void_func_i32_inreg: 156; GFX9: ; %bb.0: 157; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX9-NEXT: s_mov_b32 s17, s33 159; GFX9-NEXT: s_mov_b32 s33, s32 160; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 161; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 162; GFX9-NEXT: s_mov_b64 exec, s[18:19] 163; GFX9-NEXT: v_writelane_b32 v40, s17, 2 164; GFX9-NEXT: s_addk_i32 s32, 0x400 165; GFX9-NEXT: v_writelane_b32 v40, s30, 0 166; GFX9-NEXT: s_mov_b32 s0, s16 167; GFX9-NEXT: v_writelane_b32 v40, s31, 1 168; GFX9-NEXT: s_getpc_b64 s[18:19] 169; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i32_inreg@rel32@lo+4 170; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i32_inreg@rel32@hi+12 171; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 172; GFX9-NEXT: v_readlane_b32 s31, v40, 1 173; GFX9-NEXT: v_readlane_b32 s30, v40, 0 174; GFX9-NEXT: s_mov_b32 s32, s33 175; GFX9-NEXT: v_readlane_b32 s4, v40, 2 176; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 177; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 178; GFX9-NEXT: s_mov_b64 exec, s[6:7] 179; GFX9-NEXT: s_mov_b32 s33, s4 180; GFX9-NEXT: s_waitcnt vmcnt(0) 181; GFX9-NEXT: s_setpc_b64 s[30:31] 182; 183; GFX11-LABEL: test_call_external_void_func_i32_inreg: 184; GFX11: ; %bb.0: 185; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 186; GFX11-NEXT: s_mov_b32 s1, s33 187; GFX11-NEXT: s_mov_b32 s33, s32 188; GFX11-NEXT: s_or_saveexec_b32 s2, -1 189; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 190; GFX11-NEXT: s_mov_b32 exec_lo, s2 191; GFX11-NEXT: v_writelane_b32 v40, s1, 2 192; GFX11-NEXT: s_add_i32 s32, s32, 16 193; GFX11-NEXT: s_getpc_b64 s[2:3] 194; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32_inreg@rel32@lo+4 195; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32_inreg@rel32@hi+12 196; GFX11-NEXT: v_writelane_b32 v40, s30, 0 197; GFX11-NEXT: v_writelane_b32 v40, s31, 1 198; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 199; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 200; GFX11-NEXT: v_readlane_b32 s31, v40, 1 201; GFX11-NEXT: v_readlane_b32 s30, v40, 0 202; GFX11-NEXT: s_mov_b32 s32, s33 203; GFX11-NEXT: v_readlane_b32 s0, v40, 2 204; GFX11-NEXT: s_or_saveexec_b32 s1, -1 205; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 206; GFX11-NEXT: s_mov_b32 exec_lo, s1 207; GFX11-NEXT: s_mov_b32 s33, s0 208; GFX11-NEXT: s_waitcnt vmcnt(0) 209; GFX11-NEXT: s_setpc_b64 s[30:31] 210 call void @external_void_func_i32_inreg(i32 inreg %arg) 211 ret void 212} 213 214define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { 215; GFX9-LABEL: test_call_external_void_func_i64_inreg: 216; GFX9: ; %bb.0: 217; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX9-NEXT: s_mov_b32 s18, s33 219; GFX9-NEXT: s_mov_b32 s33, s32 220; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 221; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 222; GFX9-NEXT: s_mov_b64 exec, s[20:21] 223; GFX9-NEXT: v_writelane_b32 v40, s18, 2 224; GFX9-NEXT: s_addk_i32 s32, 0x400 225; GFX9-NEXT: v_writelane_b32 v40, s30, 0 226; GFX9-NEXT: s_mov_b32 s1, s17 227; GFX9-NEXT: s_mov_b32 s0, s16 228; GFX9-NEXT: v_writelane_b32 v40, s31, 1 229; GFX9-NEXT: s_getpc_b64 s[18:19] 230; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg@rel32@lo+4 231; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg@rel32@hi+12 232; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 233; GFX9-NEXT: v_readlane_b32 s31, v40, 1 234; GFX9-NEXT: v_readlane_b32 s30, v40, 0 235; GFX9-NEXT: s_mov_b32 s32, s33 236; GFX9-NEXT: v_readlane_b32 s4, v40, 2 237; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 238; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 239; GFX9-NEXT: s_mov_b64 exec, s[6:7] 240; GFX9-NEXT: s_mov_b32 s33, s4 241; GFX9-NEXT: s_waitcnt vmcnt(0) 242; GFX9-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX11-LABEL: test_call_external_void_func_i64_inreg: 245; GFX11: ; %bb.0: 246; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX11-NEXT: s_mov_b32 s2, s33 248; GFX11-NEXT: s_mov_b32 s33, s32 249; GFX11-NEXT: s_or_saveexec_b32 s3, -1 250; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 251; GFX11-NEXT: s_mov_b32 exec_lo, s3 252; GFX11-NEXT: v_writelane_b32 v40, s2, 2 253; GFX11-NEXT: s_add_i32 s32, s32, 16 254; GFX11-NEXT: s_getpc_b64 s[2:3] 255; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64_inreg@rel32@lo+4 256; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64_inreg@rel32@hi+12 257; GFX11-NEXT: v_writelane_b32 v40, s30, 0 258; GFX11-NEXT: v_writelane_b32 v40, s31, 1 259; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 260; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 261; GFX11-NEXT: v_readlane_b32 s31, v40, 1 262; GFX11-NEXT: v_readlane_b32 s30, v40, 0 263; GFX11-NEXT: s_mov_b32 s32, s33 264; GFX11-NEXT: v_readlane_b32 s0, v40, 2 265; GFX11-NEXT: s_or_saveexec_b32 s1, -1 266; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 267; GFX11-NEXT: s_mov_b32 exec_lo, s1 268; GFX11-NEXT: s_mov_b32 s33, s0 269; GFX11-NEXT: s_waitcnt vmcnt(0) 270; GFX11-NEXT: s_setpc_b64 s[30:31] 271 call void @external_void_func_i64_inreg(i64 inreg %arg) 272 ret void 273} 274 275define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { 276; GFX9-LABEL: test_call_external_void_func_v2i32_inreg: 277; GFX9: ; %bb.0: 278; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX9-NEXT: s_mov_b32 s18, s33 280; GFX9-NEXT: s_mov_b32 s33, s32 281; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 282; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 283; GFX9-NEXT: s_mov_b64 exec, s[20:21] 284; GFX9-NEXT: v_writelane_b32 v40, s18, 2 285; GFX9-NEXT: s_addk_i32 s32, 0x400 286; GFX9-NEXT: v_writelane_b32 v40, s30, 0 287; GFX9-NEXT: s_mov_b32 s1, s17 288; GFX9-NEXT: s_mov_b32 s0, s16 289; GFX9-NEXT: v_writelane_b32 v40, s31, 1 290; GFX9-NEXT: s_getpc_b64 s[18:19] 291; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2i32_inreg@rel32@lo+4 292; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2i32_inreg@rel32@hi+12 293; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 294; GFX9-NEXT: v_readlane_b32 s31, v40, 1 295; GFX9-NEXT: v_readlane_b32 s30, v40, 0 296; GFX9-NEXT: s_mov_b32 s32, s33 297; GFX9-NEXT: v_readlane_b32 s4, v40, 2 298; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 299; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 300; GFX9-NEXT: s_mov_b64 exec, s[6:7] 301; GFX9-NEXT: s_mov_b32 s33, s4 302; GFX9-NEXT: s_waitcnt vmcnt(0) 303; GFX9-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX11-LABEL: test_call_external_void_func_v2i32_inreg: 306; GFX11: ; %bb.0: 307; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX11-NEXT: s_mov_b32 s2, s33 309; GFX11-NEXT: s_mov_b32 s33, s32 310; GFX11-NEXT: s_or_saveexec_b32 s3, -1 311; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 312; GFX11-NEXT: s_mov_b32 exec_lo, s3 313; GFX11-NEXT: v_writelane_b32 v40, s2, 2 314; GFX11-NEXT: s_add_i32 s32, s32, 16 315; GFX11-NEXT: s_getpc_b64 s[2:3] 316; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32_inreg@rel32@lo+4 317; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32_inreg@rel32@hi+12 318; GFX11-NEXT: v_writelane_b32 v40, s30, 0 319; GFX11-NEXT: v_writelane_b32 v40, s31, 1 320; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 321; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 322; GFX11-NEXT: v_readlane_b32 s31, v40, 1 323; GFX11-NEXT: v_readlane_b32 s30, v40, 0 324; GFX11-NEXT: s_mov_b32 s32, s33 325; GFX11-NEXT: v_readlane_b32 s0, v40, 2 326; GFX11-NEXT: s_or_saveexec_b32 s1, -1 327; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 328; GFX11-NEXT: s_mov_b32 exec_lo, s1 329; GFX11-NEXT: s_mov_b32 s33, s0 330; GFX11-NEXT: s_waitcnt vmcnt(0) 331; GFX11-NEXT: s_setpc_b64 s[30:31] 332 call void @external_void_func_v2i32_inreg(<2 x i32> inreg %arg) 333 ret void 334} 335 336define void @test_call_external_void_func_v3i32_inreg(<3 x i32> inreg %arg) #0 { 337; GFX9-LABEL: test_call_external_void_func_v3i32_inreg: 338; GFX9: ; %bb.0: 339; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 340; GFX9-NEXT: s_mov_b32 s19, s33 341; GFX9-NEXT: s_mov_b32 s33, s32 342; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 343; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 344; GFX9-NEXT: s_mov_b64 exec, s[20:21] 345; GFX9-NEXT: v_writelane_b32 v40, s19, 2 346; GFX9-NEXT: s_addk_i32 s32, 0x400 347; GFX9-NEXT: v_writelane_b32 v40, s30, 0 348; GFX9-NEXT: s_mov_b32 s2, s18 349; GFX9-NEXT: s_mov_b32 s1, s17 350; GFX9-NEXT: s_mov_b32 s0, s16 351; GFX9-NEXT: v_writelane_b32 v40, s31, 1 352; GFX9-NEXT: s_getpc_b64 s[20:21] 353; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v3i32_inreg@rel32@lo+4 354; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v3i32_inreg@rel32@hi+12 355; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] 356; GFX9-NEXT: v_readlane_b32 s31, v40, 1 357; GFX9-NEXT: v_readlane_b32 s30, v40, 0 358; GFX9-NEXT: s_mov_b32 s32, s33 359; GFX9-NEXT: v_readlane_b32 s4, v40, 2 360; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 361; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 362; GFX9-NEXT: s_mov_b64 exec, s[6:7] 363; GFX9-NEXT: s_mov_b32 s33, s4 364; GFX9-NEXT: s_waitcnt vmcnt(0) 365; GFX9-NEXT: s_setpc_b64 s[30:31] 366; 367; GFX11-LABEL: test_call_external_void_func_v3i32_inreg: 368; GFX11: ; %bb.0: 369; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX11-NEXT: s_mov_b32 s3, s33 371; GFX11-NEXT: s_mov_b32 s33, s32 372; GFX11-NEXT: s_or_saveexec_b32 s16, -1 373; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 374; GFX11-NEXT: s_mov_b32 exec_lo, s16 375; GFX11-NEXT: v_writelane_b32 v40, s3, 2 376; GFX11-NEXT: s_add_i32 s32, s32, 16 377; GFX11-NEXT: s_getpc_b64 s[16:17] 378; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v3i32_inreg@rel32@lo+4 379; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v3i32_inreg@rel32@hi+12 380; GFX11-NEXT: v_writelane_b32 v40, s30, 0 381; GFX11-NEXT: v_writelane_b32 v40, s31, 1 382; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] 383; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 384; GFX11-NEXT: v_readlane_b32 s31, v40, 1 385; GFX11-NEXT: v_readlane_b32 s30, v40, 0 386; GFX11-NEXT: s_mov_b32 s32, s33 387; GFX11-NEXT: v_readlane_b32 s0, v40, 2 388; GFX11-NEXT: s_or_saveexec_b32 s1, -1 389; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 390; GFX11-NEXT: s_mov_b32 exec_lo, s1 391; GFX11-NEXT: s_mov_b32 s33, s0 392; GFX11-NEXT: s_waitcnt vmcnt(0) 393; GFX11-NEXT: s_setpc_b64 s[30:31] 394 call void @external_void_func_v3i32_inreg(<3 x i32> inreg %arg) 395 ret void 396} 397 398define void @test_call_external_void_func_v4i32_inreg(<4 x i32> inreg %arg) #0 { 399; GFX9-LABEL: test_call_external_void_func_v4i32_inreg: 400; GFX9: ; %bb.0: 401; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; GFX9-NEXT: s_mov_b32 s20, s33 403; GFX9-NEXT: s_mov_b32 s33, s32 404; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 405; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 406; GFX9-NEXT: s_mov_b64 exec, s[22:23] 407; GFX9-NEXT: v_writelane_b32 v40, s20, 2 408; GFX9-NEXT: s_addk_i32 s32, 0x400 409; GFX9-NEXT: v_writelane_b32 v40, s30, 0 410; GFX9-NEXT: s_mov_b32 s3, s19 411; GFX9-NEXT: s_mov_b32 s2, s18 412; GFX9-NEXT: s_mov_b32 s1, s17 413; GFX9-NEXT: s_mov_b32 s0, s16 414; GFX9-NEXT: v_writelane_b32 v40, s31, 1 415; GFX9-NEXT: s_getpc_b64 s[20:21] 416; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v4i32_inreg@rel32@lo+4 417; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v4i32_inreg@rel32@hi+12 418; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] 419; GFX9-NEXT: v_readlane_b32 s31, v40, 1 420; GFX9-NEXT: v_readlane_b32 s30, v40, 0 421; GFX9-NEXT: s_mov_b32 s32, s33 422; GFX9-NEXT: v_readlane_b32 s4, v40, 2 423; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 424; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 425; GFX9-NEXT: s_mov_b64 exec, s[6:7] 426; GFX9-NEXT: s_mov_b32 s33, s4 427; GFX9-NEXT: s_waitcnt vmcnt(0) 428; GFX9-NEXT: s_setpc_b64 s[30:31] 429; 430; GFX11-LABEL: test_call_external_void_func_v4i32_inreg: 431; GFX11: ; %bb.0: 432; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX11-NEXT: s_mov_b32 s16, s33 434; GFX11-NEXT: s_mov_b32 s33, s32 435; GFX11-NEXT: s_or_saveexec_b32 s17, -1 436; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 437; GFX11-NEXT: s_mov_b32 exec_lo, s17 438; GFX11-NEXT: v_writelane_b32 v40, s16, 2 439; GFX11-NEXT: s_add_i32 s32, s32, 16 440; GFX11-NEXT: s_getpc_b64 s[16:17] 441; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v4i32_inreg@rel32@lo+4 442; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v4i32_inreg@rel32@hi+12 443; GFX11-NEXT: v_writelane_b32 v40, s30, 0 444; GFX11-NEXT: v_writelane_b32 v40, s31, 1 445; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] 446; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 447; GFX11-NEXT: v_readlane_b32 s31, v40, 1 448; GFX11-NEXT: v_readlane_b32 s30, v40, 0 449; GFX11-NEXT: s_mov_b32 s32, s33 450; GFX11-NEXT: v_readlane_b32 s0, v40, 2 451; GFX11-NEXT: s_or_saveexec_b32 s1, -1 452; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 453; GFX11-NEXT: s_mov_b32 exec_lo, s1 454; GFX11-NEXT: s_mov_b32 s33, s0 455; GFX11-NEXT: s_waitcnt vmcnt(0) 456; GFX11-NEXT: s_setpc_b64 s[30:31] 457 call void @external_void_func_v4i32_inreg(<4 x i32> inreg %arg) 458 ret void 459} 460 461define void @test_call_external_void_func_v8i32_inreg(<8 x i32> inreg %arg) #0 { 462; GFX9-LABEL: test_call_external_void_func_v8i32_inreg: 463; GFX9: ; %bb.0: 464; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX9-NEXT: s_mov_b32 s24, s33 466; GFX9-NEXT: s_mov_b32 s33, s32 467; GFX9-NEXT: s_or_saveexec_b64 s[26:27], -1 468; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 469; GFX9-NEXT: s_mov_b64 exec, s[26:27] 470; GFX9-NEXT: v_writelane_b32 v40, s24, 2 471; GFX9-NEXT: s_addk_i32 s32, 0x400 472; GFX9-NEXT: v_writelane_b32 v40, s30, 0 473; GFX9-NEXT: s_mov_b32 s3, s19 474; GFX9-NEXT: s_mov_b32 s2, s18 475; GFX9-NEXT: s_mov_b32 s1, s17 476; GFX9-NEXT: s_mov_b32 s0, s16 477; GFX9-NEXT: s_mov_b32 s16, s20 478; GFX9-NEXT: s_mov_b32 s17, s21 479; GFX9-NEXT: s_mov_b32 s18, s22 480; GFX9-NEXT: s_mov_b32 s19, s23 481; GFX9-NEXT: v_writelane_b32 v40, s31, 1 482; GFX9-NEXT: s_getpc_b64 s[24:25] 483; GFX9-NEXT: s_add_u32 s24, s24, external_void_func_v8i32_inreg@rel32@lo+4 484; GFX9-NEXT: s_addc_u32 s25, s25, external_void_func_v8i32_inreg@rel32@hi+12 485; GFX9-NEXT: s_swappc_b64 s[30:31], s[24:25] 486; GFX9-NEXT: v_readlane_b32 s31, v40, 1 487; GFX9-NEXT: v_readlane_b32 s30, v40, 0 488; GFX9-NEXT: s_mov_b32 s32, s33 489; GFX9-NEXT: v_readlane_b32 s4, v40, 2 490; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 491; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 492; GFX9-NEXT: s_mov_b64 exec, s[6:7] 493; GFX9-NEXT: s_mov_b32 s33, s4 494; GFX9-NEXT: s_waitcnt vmcnt(0) 495; GFX9-NEXT: s_setpc_b64 s[30:31] 496; 497; GFX11-LABEL: test_call_external_void_func_v8i32_inreg: 498; GFX11: ; %bb.0: 499; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 500; GFX11-NEXT: s_mov_b32 s20, s33 501; GFX11-NEXT: s_mov_b32 s33, s32 502; GFX11-NEXT: s_or_saveexec_b32 s21, -1 503; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 504; GFX11-NEXT: s_mov_b32 exec_lo, s21 505; GFX11-NEXT: v_writelane_b32 v40, s20, 2 506; GFX11-NEXT: s_add_i32 s32, s32, 16 507; GFX11-NEXT: s_getpc_b64 s[20:21] 508; GFX11-NEXT: s_add_u32 s20, s20, external_void_func_v8i32_inreg@rel32@lo+4 509; GFX11-NEXT: s_addc_u32 s21, s21, external_void_func_v8i32_inreg@rel32@hi+12 510; GFX11-NEXT: v_writelane_b32 v40, s30, 0 511; GFX11-NEXT: v_writelane_b32 v40, s31, 1 512; GFX11-NEXT: s_swappc_b64 s[30:31], s[20:21] 513; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 514; GFX11-NEXT: v_readlane_b32 s31, v40, 1 515; GFX11-NEXT: v_readlane_b32 s30, v40, 0 516; GFX11-NEXT: s_mov_b32 s32, s33 517; GFX11-NEXT: v_readlane_b32 s0, v40, 2 518; GFX11-NEXT: s_or_saveexec_b32 s1, -1 519; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 520; GFX11-NEXT: s_mov_b32 exec_lo, s1 521; GFX11-NEXT: s_mov_b32 s33, s0 522; GFX11-NEXT: s_waitcnt vmcnt(0) 523; GFX11-NEXT: s_setpc_b64 s[30:31] 524 call void @external_void_func_v8i32_inreg(<8 x i32> inreg %arg) 525 ret void 526} 527 528define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { 529; GFX9-LABEL: test_call_external_void_func_f16_inreg: 530; GFX9: ; %bb.0: 531; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; GFX9-NEXT: s_mov_b32 s17, s33 533; GFX9-NEXT: s_mov_b32 s33, s32 534; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 535; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 536; GFX9-NEXT: s_mov_b64 exec, s[18:19] 537; GFX9-NEXT: v_writelane_b32 v40, s17, 2 538; GFX9-NEXT: s_addk_i32 s32, 0x400 539; GFX9-NEXT: v_writelane_b32 v40, s30, 0 540; GFX9-NEXT: s_mov_b32 s0, s16 541; GFX9-NEXT: v_writelane_b32 v40, s31, 1 542; GFX9-NEXT: s_getpc_b64 s[18:19] 543; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f16_inreg@rel32@lo+4 544; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f16_inreg@rel32@hi+12 545; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 546; GFX9-NEXT: v_readlane_b32 s31, v40, 1 547; GFX9-NEXT: v_readlane_b32 s30, v40, 0 548; GFX9-NEXT: s_mov_b32 s32, s33 549; GFX9-NEXT: v_readlane_b32 s4, v40, 2 550; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 551; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 552; GFX9-NEXT: s_mov_b64 exec, s[6:7] 553; GFX9-NEXT: s_mov_b32 s33, s4 554; GFX9-NEXT: s_waitcnt vmcnt(0) 555; GFX9-NEXT: s_setpc_b64 s[30:31] 556; 557; GFX11-LABEL: test_call_external_void_func_f16_inreg: 558; GFX11: ; %bb.0: 559; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX11-NEXT: s_mov_b32 s1, s33 561; GFX11-NEXT: s_mov_b32 s33, s32 562; GFX11-NEXT: s_or_saveexec_b32 s2, -1 563; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 564; GFX11-NEXT: s_mov_b32 exec_lo, s2 565; GFX11-NEXT: v_writelane_b32 v40, s1, 2 566; GFX11-NEXT: s_add_i32 s32, s32, 16 567; GFX11-NEXT: s_getpc_b64 s[2:3] 568; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16_inreg@rel32@lo+4 569; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16_inreg@rel32@hi+12 570; GFX11-NEXT: v_writelane_b32 v40, s30, 0 571; GFX11-NEXT: v_writelane_b32 v40, s31, 1 572; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 573; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 574; GFX11-NEXT: v_readlane_b32 s31, v40, 1 575; GFX11-NEXT: v_readlane_b32 s30, v40, 0 576; GFX11-NEXT: s_mov_b32 s32, s33 577; GFX11-NEXT: v_readlane_b32 s0, v40, 2 578; GFX11-NEXT: s_or_saveexec_b32 s1, -1 579; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 580; GFX11-NEXT: s_mov_b32 exec_lo, s1 581; GFX11-NEXT: s_mov_b32 s33, s0 582; GFX11-NEXT: s_waitcnt vmcnt(0) 583; GFX11-NEXT: s_setpc_b64 s[30:31] 584 call void @external_void_func_f16_inreg(half inreg %arg) 585 ret void 586} 587 588define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { 589; GFX9-LABEL: test_call_external_void_func_bf16_inreg: 590; GFX9: ; %bb.0: 591; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX9-NEXT: s_mov_b32 s17, s33 593; GFX9-NEXT: s_mov_b32 s33, s32 594; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 595; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 596; GFX9-NEXT: s_mov_b64 exec, s[18:19] 597; GFX9-NEXT: v_writelane_b32 v40, s17, 2 598; GFX9-NEXT: s_addk_i32 s32, 0x400 599; GFX9-NEXT: v_writelane_b32 v40, s30, 0 600; GFX9-NEXT: s_mov_b32 s0, s16 601; GFX9-NEXT: v_writelane_b32 v40, s31, 1 602; GFX9-NEXT: s_getpc_b64 s[18:19] 603; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4 604; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12 605; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 606; GFX9-NEXT: v_readlane_b32 s31, v40, 1 607; GFX9-NEXT: v_readlane_b32 s30, v40, 0 608; GFX9-NEXT: s_mov_b32 s32, s33 609; GFX9-NEXT: v_readlane_b32 s4, v40, 2 610; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 611; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 612; GFX9-NEXT: s_mov_b64 exec, s[6:7] 613; GFX9-NEXT: s_mov_b32 s33, s4 614; GFX9-NEXT: s_waitcnt vmcnt(0) 615; GFX9-NEXT: s_setpc_b64 s[30:31] 616; 617; GFX11-LABEL: test_call_external_void_func_bf16_inreg: 618; GFX11: ; %bb.0: 619; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX11-NEXT: s_mov_b32 s1, s33 621; GFX11-NEXT: s_mov_b32 s33, s32 622; GFX11-NEXT: s_or_saveexec_b32 s2, -1 623; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 624; GFX11-NEXT: s_mov_b32 exec_lo, s2 625; GFX11-NEXT: v_writelane_b32 v40, s1, 2 626; GFX11-NEXT: s_add_i32 s32, s32, 16 627; GFX11-NEXT: s_getpc_b64 s[2:3] 628; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4 629; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12 630; GFX11-NEXT: v_writelane_b32 v40, s30, 0 631; GFX11-NEXT: v_writelane_b32 v40, s31, 1 632; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 633; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 634; GFX11-NEXT: v_readlane_b32 s31, v40, 1 635; GFX11-NEXT: v_readlane_b32 s30, v40, 0 636; GFX11-NEXT: s_mov_b32 s32, s33 637; GFX11-NEXT: v_readlane_b32 s0, v40, 2 638; GFX11-NEXT: s_or_saveexec_b32 s1, -1 639; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 640; GFX11-NEXT: s_mov_b32 exec_lo, s1 641; GFX11-NEXT: s_mov_b32 s33, s0 642; GFX11-NEXT: s_waitcnt vmcnt(0) 643; GFX11-NEXT: s_setpc_b64 s[30:31] 644 call void @external_void_func_bf16_inreg(bfloat inreg %arg) 645 ret void 646} 647 648define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { 649; GFX9-LABEL: test_call_external_void_func_f32_inreg: 650; GFX9: ; %bb.0: 651; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX9-NEXT: s_mov_b32 s17, s33 653; GFX9-NEXT: s_mov_b32 s33, s32 654; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 655; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 656; GFX9-NEXT: s_mov_b64 exec, s[18:19] 657; GFX9-NEXT: v_writelane_b32 v40, s17, 2 658; GFX9-NEXT: s_addk_i32 s32, 0x400 659; GFX9-NEXT: v_writelane_b32 v40, s30, 0 660; GFX9-NEXT: s_mov_b32 s0, s16 661; GFX9-NEXT: v_writelane_b32 v40, s31, 1 662; GFX9-NEXT: s_getpc_b64 s[18:19] 663; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f32_inreg@rel32@lo+4 664; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f32_inreg@rel32@hi+12 665; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 666; GFX9-NEXT: v_readlane_b32 s31, v40, 1 667; GFX9-NEXT: v_readlane_b32 s30, v40, 0 668; GFX9-NEXT: s_mov_b32 s32, s33 669; GFX9-NEXT: v_readlane_b32 s4, v40, 2 670; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 671; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 672; GFX9-NEXT: s_mov_b64 exec, s[6:7] 673; GFX9-NEXT: s_mov_b32 s33, s4 674; GFX9-NEXT: s_waitcnt vmcnt(0) 675; GFX9-NEXT: s_setpc_b64 s[30:31] 676; 677; GFX11-LABEL: test_call_external_void_func_f32_inreg: 678; GFX11: ; %bb.0: 679; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX11-NEXT: s_mov_b32 s1, s33 681; GFX11-NEXT: s_mov_b32 s33, s32 682; GFX11-NEXT: s_or_saveexec_b32 s2, -1 683; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 684; GFX11-NEXT: s_mov_b32 exec_lo, s2 685; GFX11-NEXT: v_writelane_b32 v40, s1, 2 686; GFX11-NEXT: s_add_i32 s32, s32, 16 687; GFX11-NEXT: s_getpc_b64 s[2:3] 688; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32_inreg@rel32@lo+4 689; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32_inreg@rel32@hi+12 690; GFX11-NEXT: v_writelane_b32 v40, s30, 0 691; GFX11-NEXT: v_writelane_b32 v40, s31, 1 692; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 693; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 694; GFX11-NEXT: v_readlane_b32 s31, v40, 1 695; GFX11-NEXT: v_readlane_b32 s30, v40, 0 696; GFX11-NEXT: s_mov_b32 s32, s33 697; GFX11-NEXT: v_readlane_b32 s0, v40, 2 698; GFX11-NEXT: s_or_saveexec_b32 s1, -1 699; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 700; GFX11-NEXT: s_mov_b32 exec_lo, s1 701; GFX11-NEXT: s_mov_b32 s33, s0 702; GFX11-NEXT: s_waitcnt vmcnt(0) 703; GFX11-NEXT: s_setpc_b64 s[30:31] 704 call void @external_void_func_f32_inreg(float inreg %arg) 705 ret void 706} 707 708define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { 709; GFX9-LABEL: test_call_external_void_func_f64_inreg: 710; GFX9: ; %bb.0: 711; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX9-NEXT: s_mov_b32 s18, s33 713; GFX9-NEXT: s_mov_b32 s33, s32 714; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 715; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 716; GFX9-NEXT: s_mov_b64 exec, s[20:21] 717; GFX9-NEXT: v_writelane_b32 v40, s18, 2 718; GFX9-NEXT: s_addk_i32 s32, 0x400 719; GFX9-NEXT: v_writelane_b32 v40, s30, 0 720; GFX9-NEXT: s_mov_b32 s1, s17 721; GFX9-NEXT: s_mov_b32 s0, s16 722; GFX9-NEXT: v_writelane_b32 v40, s31, 1 723; GFX9-NEXT: s_getpc_b64 s[18:19] 724; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_f64_inreg@rel32@lo+4 725; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_f64_inreg@rel32@hi+12 726; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 727; GFX9-NEXT: v_readlane_b32 s31, v40, 1 728; GFX9-NEXT: v_readlane_b32 s30, v40, 0 729; GFX9-NEXT: s_mov_b32 s32, s33 730; GFX9-NEXT: v_readlane_b32 s4, v40, 2 731; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 732; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 733; GFX9-NEXT: s_mov_b64 exec, s[6:7] 734; GFX9-NEXT: s_mov_b32 s33, s4 735; GFX9-NEXT: s_waitcnt vmcnt(0) 736; GFX9-NEXT: s_setpc_b64 s[30:31] 737; 738; GFX11-LABEL: test_call_external_void_func_f64_inreg: 739; GFX11: ; %bb.0: 740; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 741; GFX11-NEXT: s_mov_b32 s2, s33 742; GFX11-NEXT: s_mov_b32 s33, s32 743; GFX11-NEXT: s_or_saveexec_b32 s3, -1 744; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 745; GFX11-NEXT: s_mov_b32 exec_lo, s3 746; GFX11-NEXT: v_writelane_b32 v40, s2, 2 747; GFX11-NEXT: s_add_i32 s32, s32, 16 748; GFX11-NEXT: s_getpc_b64 s[2:3] 749; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64_inreg@rel32@lo+4 750; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64_inreg@rel32@hi+12 751; GFX11-NEXT: v_writelane_b32 v40, s30, 0 752; GFX11-NEXT: v_writelane_b32 v40, s31, 1 753; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 754; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 755; GFX11-NEXT: v_readlane_b32 s31, v40, 1 756; GFX11-NEXT: v_readlane_b32 s30, v40, 0 757; GFX11-NEXT: s_mov_b32 s32, s33 758; GFX11-NEXT: v_readlane_b32 s0, v40, 2 759; GFX11-NEXT: s_or_saveexec_b32 s1, -1 760; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 761; GFX11-NEXT: s_mov_b32 exec_lo, s1 762; GFX11-NEXT: s_mov_b32 s33, s0 763; GFX11-NEXT: s_waitcnt vmcnt(0) 764; GFX11-NEXT: s_setpc_b64 s[30:31] 765 call void @external_void_func_f64_inreg(double inreg %arg) 766 ret void 767} 768 769define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 { 770; GFX9-LABEL: test_call_external_void_func_v2f16_inreg: 771; GFX9: ; %bb.0: 772; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; GFX9-NEXT: s_mov_b32 s17, s33 774; GFX9-NEXT: s_mov_b32 s33, s32 775; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 776; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 777; GFX9-NEXT: s_mov_b64 exec, s[18:19] 778; GFX9-NEXT: v_writelane_b32 v40, s17, 2 779; GFX9-NEXT: s_addk_i32 s32, 0x400 780; GFX9-NEXT: v_writelane_b32 v40, s30, 0 781; GFX9-NEXT: s_mov_b32 s0, s16 782; GFX9-NEXT: v_writelane_b32 v40, s31, 1 783; GFX9-NEXT: s_getpc_b64 s[18:19] 784; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2f16_inreg@rel32@lo+4 785; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2f16_inreg@rel32@hi+12 786; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 787; GFX9-NEXT: v_readlane_b32 s31, v40, 1 788; GFX9-NEXT: v_readlane_b32 s30, v40, 0 789; GFX9-NEXT: s_mov_b32 s32, s33 790; GFX9-NEXT: v_readlane_b32 s4, v40, 2 791; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 792; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 793; GFX9-NEXT: s_mov_b64 exec, s[6:7] 794; GFX9-NEXT: s_mov_b32 s33, s4 795; GFX9-NEXT: s_waitcnt vmcnt(0) 796; GFX9-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX11-LABEL: test_call_external_void_func_v2f16_inreg: 799; GFX11: ; %bb.0: 800; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX11-NEXT: s_mov_b32 s1, s33 802; GFX11-NEXT: s_mov_b32 s33, s32 803; GFX11-NEXT: s_or_saveexec_b32 s2, -1 804; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 805; GFX11-NEXT: s_mov_b32 exec_lo, s2 806; GFX11-NEXT: v_writelane_b32 v40, s1, 2 807; GFX11-NEXT: s_add_i32 s32, s32, 16 808; GFX11-NEXT: s_getpc_b64 s[2:3] 809; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16_inreg@rel32@lo+4 810; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16_inreg@rel32@hi+12 811; GFX11-NEXT: v_writelane_b32 v40, s30, 0 812; GFX11-NEXT: v_writelane_b32 v40, s31, 1 813; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 814; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 815; GFX11-NEXT: v_readlane_b32 s31, v40, 1 816; GFX11-NEXT: v_readlane_b32 s30, v40, 0 817; GFX11-NEXT: s_mov_b32 s32, s33 818; GFX11-NEXT: v_readlane_b32 s0, v40, 2 819; GFX11-NEXT: s_or_saveexec_b32 s1, -1 820; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 821; GFX11-NEXT: s_mov_b32 exec_lo, s1 822; GFX11-NEXT: s_mov_b32 s33, s0 823; GFX11-NEXT: s_waitcnt vmcnt(0) 824; GFX11-NEXT: s_setpc_b64 s[30:31] 825 call void @external_void_func_v2f16_inreg(<2 x half> inreg %arg) 826 ret void 827} 828 829 830define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) #0 { 831; GFX9-LABEL: test_call_external_void_func_v2bf16_inreg: 832; GFX9: ; %bb.0: 833; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; GFX9-NEXT: s_mov_b32 s17, s33 835; GFX9-NEXT: s_mov_b32 s33, s32 836; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 837; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 838; GFX9-NEXT: s_mov_b64 exec, s[18:19] 839; GFX9-NEXT: v_writelane_b32 v40, s17, 2 840; GFX9-NEXT: s_addk_i32 s32, 0x400 841; GFX9-NEXT: v_writelane_b32 v40, s30, 0 842; GFX9-NEXT: s_mov_b32 s0, s16 843; GFX9-NEXT: v_writelane_b32 v40, s31, 1 844; GFX9-NEXT: s_getpc_b64 s[18:19] 845; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4 846; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12 847; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 848; GFX9-NEXT: v_readlane_b32 s31, v40, 1 849; GFX9-NEXT: v_readlane_b32 s30, v40, 0 850; GFX9-NEXT: s_mov_b32 s32, s33 851; GFX9-NEXT: v_readlane_b32 s4, v40, 2 852; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 853; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 854; GFX9-NEXT: s_mov_b64 exec, s[6:7] 855; GFX9-NEXT: s_mov_b32 s33, s4 856; GFX9-NEXT: s_waitcnt vmcnt(0) 857; GFX9-NEXT: s_setpc_b64 s[30:31] 858; 859; GFX11-LABEL: test_call_external_void_func_v2bf16_inreg: 860; GFX11: ; %bb.0: 861; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 862; GFX11-NEXT: s_mov_b32 s1, s33 863; GFX11-NEXT: s_mov_b32 s33, s32 864; GFX11-NEXT: s_or_saveexec_b32 s2, -1 865; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 866; GFX11-NEXT: s_mov_b32 exec_lo, s2 867; GFX11-NEXT: v_writelane_b32 v40, s1, 2 868; GFX11-NEXT: s_add_i32 s32, s32, 16 869; GFX11-NEXT: s_getpc_b64 s[2:3] 870; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4 871; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12 872; GFX11-NEXT: v_writelane_b32 v40, s30, 0 873; GFX11-NEXT: v_writelane_b32 v40, s31, 1 874; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 875; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 876; GFX11-NEXT: v_readlane_b32 s31, v40, 1 877; GFX11-NEXT: v_readlane_b32 s30, v40, 0 878; GFX11-NEXT: s_mov_b32 s32, s33 879; GFX11-NEXT: v_readlane_b32 s0, v40, 2 880; GFX11-NEXT: s_or_saveexec_b32 s1, -1 881; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 882; GFX11-NEXT: s_mov_b32 exec_lo, s1 883; GFX11-NEXT: s_mov_b32 s33, s0 884; GFX11-NEXT: s_waitcnt vmcnt(0) 885; GFX11-NEXT: s_setpc_b64 s[30:31] 886 call void @external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg) 887 ret void 888} 889 890define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 { 891; GFX9-LABEL: test_call_external_void_func_v3f16_inreg: 892; GFX9: ; %bb.0: 893; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 894; GFX9-NEXT: s_mov_b32 s18, s33 895; GFX9-NEXT: s_mov_b32 s33, s32 896; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 897; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 898; GFX9-NEXT: s_mov_b64 exec, s[20:21] 899; GFX9-NEXT: v_writelane_b32 v40, s18, 2 900; GFX9-NEXT: s_addk_i32 s32, 0x400 901; GFX9-NEXT: v_writelane_b32 v40, s30, 0 902; GFX9-NEXT: s_mov_b32 s1, s17 903; GFX9-NEXT: s_mov_b32 s0, s16 904; GFX9-NEXT: v_writelane_b32 v40, s31, 1 905; GFX9-NEXT: s_getpc_b64 s[18:19] 906; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v3f16_inreg@rel32@lo+4 907; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v3f16_inreg@rel32@hi+12 908; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 909; GFX9-NEXT: v_readlane_b32 s31, v40, 1 910; GFX9-NEXT: v_readlane_b32 s30, v40, 0 911; GFX9-NEXT: s_mov_b32 s32, s33 912; GFX9-NEXT: v_readlane_b32 s4, v40, 2 913; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 914; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 915; GFX9-NEXT: s_mov_b64 exec, s[6:7] 916; GFX9-NEXT: s_mov_b32 s33, s4 917; GFX9-NEXT: s_waitcnt vmcnt(0) 918; GFX9-NEXT: s_setpc_b64 s[30:31] 919; 920; GFX11-LABEL: test_call_external_void_func_v3f16_inreg: 921; GFX11: ; %bb.0: 922; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 923; GFX11-NEXT: s_mov_b32 s2, s33 924; GFX11-NEXT: s_mov_b32 s33, s32 925; GFX11-NEXT: s_or_saveexec_b32 s3, -1 926; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 927; GFX11-NEXT: s_mov_b32 exec_lo, s3 928; GFX11-NEXT: v_writelane_b32 v40, s2, 2 929; GFX11-NEXT: s_add_i32 s32, s32, 16 930; GFX11-NEXT: s_getpc_b64 s[2:3] 931; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16_inreg@rel32@lo+4 932; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16_inreg@rel32@hi+12 933; GFX11-NEXT: v_writelane_b32 v40, s30, 0 934; GFX11-NEXT: v_writelane_b32 v40, s31, 1 935; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 936; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 937; GFX11-NEXT: v_readlane_b32 s31, v40, 1 938; GFX11-NEXT: v_readlane_b32 s30, v40, 0 939; GFX11-NEXT: s_mov_b32 s32, s33 940; GFX11-NEXT: v_readlane_b32 s0, v40, 2 941; GFX11-NEXT: s_or_saveexec_b32 s1, -1 942; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 943; GFX11-NEXT: s_mov_b32 exec_lo, s1 944; GFX11-NEXT: s_mov_b32 s33, s0 945; GFX11-NEXT: s_waitcnt vmcnt(0) 946; GFX11-NEXT: s_setpc_b64 s[30:31] 947 call void @external_void_func_v3f16_inreg(<3 x half> inreg %arg) 948 ret void 949} 950 951define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 { 952; GFX9-LABEL: test_call_external_void_func_v4f16_inreg: 953; GFX9: ; %bb.0: 954; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 955; GFX9-NEXT: s_mov_b32 s18, s33 956; GFX9-NEXT: s_mov_b32 s33, s32 957; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 958; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 959; GFX9-NEXT: s_mov_b64 exec, s[20:21] 960; GFX9-NEXT: v_writelane_b32 v40, s18, 2 961; GFX9-NEXT: s_addk_i32 s32, 0x400 962; GFX9-NEXT: v_writelane_b32 v40, s30, 0 963; GFX9-NEXT: s_mov_b32 s1, s17 964; GFX9-NEXT: s_mov_b32 s0, s16 965; GFX9-NEXT: v_writelane_b32 v40, s31, 1 966; GFX9-NEXT: s_getpc_b64 s[18:19] 967; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v4f16_inreg@rel32@lo+4 968; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v4f16_inreg@rel32@hi+12 969; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 970; GFX9-NEXT: v_readlane_b32 s31, v40, 1 971; GFX9-NEXT: v_readlane_b32 s30, v40, 0 972; GFX9-NEXT: s_mov_b32 s32, s33 973; GFX9-NEXT: v_readlane_b32 s4, v40, 2 974; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 975; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 976; GFX9-NEXT: s_mov_b64 exec, s[6:7] 977; GFX9-NEXT: s_mov_b32 s33, s4 978; GFX9-NEXT: s_waitcnt vmcnt(0) 979; GFX9-NEXT: s_setpc_b64 s[30:31] 980; 981; GFX11-LABEL: test_call_external_void_func_v4f16_inreg: 982; GFX11: ; %bb.0: 983; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GFX11-NEXT: s_mov_b32 s2, s33 985; GFX11-NEXT: s_mov_b32 s33, s32 986; GFX11-NEXT: s_or_saveexec_b32 s3, -1 987; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 988; GFX11-NEXT: s_mov_b32 exec_lo, s3 989; GFX11-NEXT: v_writelane_b32 v40, s2, 2 990; GFX11-NEXT: s_add_i32 s32, s32, 16 991; GFX11-NEXT: s_getpc_b64 s[2:3] 992; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4f16_inreg@rel32@lo+4 993; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4f16_inreg@rel32@hi+12 994; GFX11-NEXT: v_writelane_b32 v40, s30, 0 995; GFX11-NEXT: v_writelane_b32 v40, s31, 1 996; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 997; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 998; GFX11-NEXT: v_readlane_b32 s31, v40, 1 999; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1000; GFX11-NEXT: s_mov_b32 s32, s33 1001; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1002; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1003; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1004; GFX11-NEXT: s_mov_b32 exec_lo, s1 1005; GFX11-NEXT: s_mov_b32 s33, s0 1006; GFX11-NEXT: s_waitcnt vmcnt(0) 1007; GFX11-NEXT: s_setpc_b64 s[30:31] 1008 call void @external_void_func_v4f16_inreg(<4 x half> inreg %arg) 1009 ret void 1010} 1011 1012define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { 1013; GFX9-LABEL: test_call_external_void_func_p0_inreg: 1014; GFX9: ; %bb.0: 1015; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1016; GFX9-NEXT: s_mov_b32 s18, s33 1017; GFX9-NEXT: s_mov_b32 s33, s32 1018; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 1019; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1020; GFX9-NEXT: s_mov_b64 exec, s[20:21] 1021; GFX9-NEXT: v_writelane_b32 v40, s18, 2 1022; GFX9-NEXT: s_addk_i32 s32, 0x400 1023; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1024; GFX9-NEXT: s_mov_b32 s1, s17 1025; GFX9-NEXT: s_mov_b32 s0, s16 1026; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1027; GFX9-NEXT: s_getpc_b64 s[18:19] 1028; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p0_inreg@rel32@lo+4 1029; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p0_inreg@rel32@hi+12 1030; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 1031; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1032; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1033; GFX9-NEXT: s_mov_b32 s32, s33 1034; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1035; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1036; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1037; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1038; GFX9-NEXT: s_mov_b32 s33, s4 1039; GFX9-NEXT: s_waitcnt vmcnt(0) 1040; GFX9-NEXT: s_setpc_b64 s[30:31] 1041; 1042; GFX11-LABEL: test_call_external_void_func_p0_inreg: 1043; GFX11: ; %bb.0: 1044; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1045; GFX11-NEXT: s_mov_b32 s2, s33 1046; GFX11-NEXT: s_mov_b32 s33, s32 1047; GFX11-NEXT: s_or_saveexec_b32 s3, -1 1048; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1049; GFX11-NEXT: s_mov_b32 exec_lo, s3 1050; GFX11-NEXT: v_writelane_b32 v40, s2, 2 1051; GFX11-NEXT: s_add_i32 s32, s32, 16 1052; GFX11-NEXT: s_getpc_b64 s[2:3] 1053; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p0_inreg@rel32@lo+4 1054; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p0_inreg@rel32@hi+12 1055; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1056; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1057; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1058; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1059; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1060; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1061; GFX11-NEXT: s_mov_b32 s32, s33 1062; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1063; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1064; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1065; GFX11-NEXT: s_mov_b32 exec_lo, s1 1066; GFX11-NEXT: s_mov_b32 s33, s0 1067; GFX11-NEXT: s_waitcnt vmcnt(0) 1068; GFX11-NEXT: s_setpc_b64 s[30:31] 1069 call void @external_void_func_p0_inreg(ptr inreg %arg) 1070 ret void 1071} 1072 1073define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) #0 { 1074; GFX9-LABEL: test_call_external_void_func_p1_inreg: 1075; GFX9: ; %bb.0: 1076; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1077; GFX9-NEXT: s_mov_b32 s18, s33 1078; GFX9-NEXT: s_mov_b32 s33, s32 1079; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 1080; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1081; GFX9-NEXT: s_mov_b64 exec, s[20:21] 1082; GFX9-NEXT: v_writelane_b32 v40, s18, 2 1083; GFX9-NEXT: s_addk_i32 s32, 0x400 1084; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1085; GFX9-NEXT: s_mov_b32 s1, s17 1086; GFX9-NEXT: s_mov_b32 s0, s16 1087; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1088; GFX9-NEXT: s_getpc_b64 s[18:19] 1089; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p1_inreg@rel32@lo+4 1090; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p1_inreg@rel32@hi+12 1091; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 1092; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1093; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1094; GFX9-NEXT: s_mov_b32 s32, s33 1095; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1096; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1097; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1098; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1099; GFX9-NEXT: s_mov_b32 s33, s4 1100; GFX9-NEXT: s_waitcnt vmcnt(0) 1101; GFX9-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX11-LABEL: test_call_external_void_func_p1_inreg: 1104; GFX11: ; %bb.0: 1105; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX11-NEXT: s_mov_b32 s2, s33 1107; GFX11-NEXT: s_mov_b32 s33, s32 1108; GFX11-NEXT: s_or_saveexec_b32 s3, -1 1109; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1110; GFX11-NEXT: s_mov_b32 exec_lo, s3 1111; GFX11-NEXT: v_writelane_b32 v40, s2, 2 1112; GFX11-NEXT: s_add_i32 s32, s32, 16 1113; GFX11-NEXT: s_getpc_b64 s[2:3] 1114; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p1_inreg@rel32@lo+4 1115; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p1_inreg@rel32@hi+12 1116; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1117; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1118; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1119; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1120; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1121; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1122; GFX11-NEXT: s_mov_b32 s32, s33 1123; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1124; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1125; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1126; GFX11-NEXT: s_mov_b32 exec_lo, s1 1127; GFX11-NEXT: s_mov_b32 s33, s0 1128; GFX11-NEXT: s_waitcnt vmcnt(0) 1129; GFX11-NEXT: s_setpc_b64 s[30:31] 1130 call void @external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) 1131 ret void 1132} 1133 1134define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) #0 { 1135; GFX9-LABEL: test_call_external_void_func_p3_inreg: 1136; GFX9: ; %bb.0: 1137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138; GFX9-NEXT: s_mov_b32 s17, s33 1139; GFX9-NEXT: s_mov_b32 s33, s32 1140; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 1141; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1142; GFX9-NEXT: s_mov_b64 exec, s[18:19] 1143; GFX9-NEXT: v_writelane_b32 v40, s17, 2 1144; GFX9-NEXT: s_addk_i32 s32, 0x400 1145; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1146; GFX9-NEXT: s_mov_b32 s0, s16 1147; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1148; GFX9-NEXT: s_getpc_b64 s[18:19] 1149; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_p3_inreg@rel32@lo+4 1150; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_p3_inreg@rel32@hi+12 1151; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 1152; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1153; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1154; GFX9-NEXT: s_mov_b32 s32, s33 1155; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1156; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1157; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1158; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1159; GFX9-NEXT: s_mov_b32 s33, s4 1160; GFX9-NEXT: s_waitcnt vmcnt(0) 1161; GFX9-NEXT: s_setpc_b64 s[30:31] 1162; 1163; GFX11-LABEL: test_call_external_void_func_p3_inreg: 1164; GFX11: ; %bb.0: 1165; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX11-NEXT: s_mov_b32 s1, s33 1167; GFX11-NEXT: s_mov_b32 s33, s32 1168; GFX11-NEXT: s_or_saveexec_b32 s2, -1 1169; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1170; GFX11-NEXT: s_mov_b32 exec_lo, s2 1171; GFX11-NEXT: v_writelane_b32 v40, s1, 2 1172; GFX11-NEXT: s_add_i32 s32, s32, 16 1173; GFX11-NEXT: s_getpc_b64 s[2:3] 1174; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_p3_inreg@rel32@lo+4 1175; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_p3_inreg@rel32@hi+12 1176; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1177; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1178; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1179; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1180; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1181; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1182; GFX11-NEXT: s_mov_b32 s32, s33 1183; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1184; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1185; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1186; GFX11-NEXT: s_mov_b32 exec_lo, s1 1187; GFX11-NEXT: s_mov_b32 s33, s0 1188; GFX11-NEXT: s_waitcnt vmcnt(0) 1189; GFX11-NEXT: s_setpc_b64 s[30:31] 1190 call void @external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) 1191 ret void 1192} 1193 1194define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) #0 { 1195; GFX9-LABEL: test_call_external_void_func_v2p1_inreg: 1196; GFX9: ; %bb.0: 1197; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GFX9-NEXT: s_mov_b32 s20, s33 1199; GFX9-NEXT: s_mov_b32 s33, s32 1200; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 1201; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1202; GFX9-NEXT: s_mov_b64 exec, s[22:23] 1203; GFX9-NEXT: v_writelane_b32 v40, s20, 2 1204; GFX9-NEXT: s_addk_i32 s32, 0x400 1205; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1206; GFX9-NEXT: s_mov_b32 s3, s19 1207; GFX9-NEXT: s_mov_b32 s2, s18 1208; GFX9-NEXT: s_mov_b32 s1, s17 1209; GFX9-NEXT: s_mov_b32 s0, s16 1210; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1211; GFX9-NEXT: s_getpc_b64 s[20:21] 1212; GFX9-NEXT: s_add_u32 s20, s20, external_void_func_v2p1_inreg@rel32@lo+4 1213; GFX9-NEXT: s_addc_u32 s21, s21, external_void_func_v2p1_inreg@rel32@hi+12 1214; GFX9-NEXT: s_swappc_b64 s[30:31], s[20:21] 1215; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1216; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1217; GFX9-NEXT: s_mov_b32 s32, s33 1218; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1219; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1220; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1221; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1222; GFX9-NEXT: s_mov_b32 s33, s4 1223; GFX9-NEXT: s_waitcnt vmcnt(0) 1224; GFX9-NEXT: s_setpc_b64 s[30:31] 1225; 1226; GFX11-LABEL: test_call_external_void_func_v2p1_inreg: 1227; GFX11: ; %bb.0: 1228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GFX11-NEXT: s_mov_b32 s16, s33 1230; GFX11-NEXT: s_mov_b32 s33, s32 1231; GFX11-NEXT: s_or_saveexec_b32 s17, -1 1232; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1233; GFX11-NEXT: s_mov_b32 exec_lo, s17 1234; GFX11-NEXT: v_writelane_b32 v40, s16, 2 1235; GFX11-NEXT: s_add_i32 s32, s32, 16 1236; GFX11-NEXT: s_getpc_b64 s[16:17] 1237; GFX11-NEXT: s_add_u32 s16, s16, external_void_func_v2p1_inreg@rel32@lo+4 1238; GFX11-NEXT: s_addc_u32 s17, s17, external_void_func_v2p1_inreg@rel32@hi+12 1239; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1240; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1241; GFX11-NEXT: s_swappc_b64 s[30:31], s[16:17] 1242; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1243; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1244; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1245; GFX11-NEXT: s_mov_b32 s32, s33 1246; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1247; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1248; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1249; GFX11-NEXT: s_mov_b32 exec_lo, s1 1250; GFX11-NEXT: s_mov_b32 s33, s0 1251; GFX11-NEXT: s_waitcnt vmcnt(0) 1252; GFX11-NEXT: s_setpc_b64 s[30:31] 1253 call void @external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inreg %arg) 1254 ret void 1255} 1256 1257define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) #0 { 1258; GFX9-LABEL: test_call_external_void_func_v2p5_inreg: 1259; GFX9: ; %bb.0: 1260; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1261; GFX9-NEXT: s_mov_b32 s18, s33 1262; GFX9-NEXT: s_mov_b32 s33, s32 1263; GFX9-NEXT: s_or_saveexec_b64 s[20:21], -1 1264; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1265; GFX9-NEXT: s_mov_b64 exec, s[20:21] 1266; GFX9-NEXT: v_writelane_b32 v40, s18, 2 1267; GFX9-NEXT: s_addk_i32 s32, 0x400 1268; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1269; GFX9-NEXT: s_mov_b32 s1, s17 1270; GFX9-NEXT: s_mov_b32 s0, s16 1271; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1272; GFX9-NEXT: s_getpc_b64 s[18:19] 1273; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2p5_inreg@rel32@lo+4 1274; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2p5_inreg@rel32@hi+12 1275; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19] 1276; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1277; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1278; GFX9-NEXT: s_mov_b32 s32, s33 1279; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1280; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1281; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1282; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1283; GFX9-NEXT: s_mov_b32 s33, s4 1284; GFX9-NEXT: s_waitcnt vmcnt(0) 1285; GFX9-NEXT: s_setpc_b64 s[30:31] 1286; 1287; GFX11-LABEL: test_call_external_void_func_v2p5_inreg: 1288; GFX11: ; %bb.0: 1289; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX11-NEXT: s_mov_b32 s2, s33 1291; GFX11-NEXT: s_mov_b32 s33, s32 1292; GFX11-NEXT: s_or_saveexec_b32 s3, -1 1293; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1294; GFX11-NEXT: s_mov_b32 exec_lo, s3 1295; GFX11-NEXT: v_writelane_b32 v40, s2, 2 1296; GFX11-NEXT: s_add_i32 s32, s32, 16 1297; GFX11-NEXT: s_getpc_b64 s[2:3] 1298; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2p5_inreg@rel32@lo+4 1299; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2p5_inreg@rel32@hi+12 1300; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1301; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1302; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1303; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1304; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1305; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1306; GFX11-NEXT: s_mov_b32 s32, s33 1307; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1308; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1309; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1310; GFX11-NEXT: s_mov_b32 exec_lo, s1 1311; GFX11-NEXT: s_mov_b32 s33, s0 1312; GFX11-NEXT: s_waitcnt vmcnt(0) 1313; GFX11-NEXT: s_setpc_b64 s[30:31] 1314 call void @external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inreg %arg) 1315 ret void 1316} 1317 1318define void @test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) #0 { 1319; GFX9-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg: 1320; GFX9: ; %bb.0: 1321; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1322; GFX9-NEXT: s_mov_b32 s21, s33 1323; GFX9-NEXT: s_mov_b32 s33, s32 1324; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 1325; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1326; GFX9-NEXT: s_mov_b64 exec, s[22:23] 1327; GFX9-NEXT: v_writelane_b32 v40, s21, 2 1328; GFX9-NEXT: s_addk_i32 s32, 0x400 1329; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1330; GFX9-NEXT: s_mov_b32 s3, s19 1331; GFX9-NEXT: s_mov_b32 s2, s18 1332; GFX9-NEXT: s_mov_b32 s1, s17 1333; GFX9-NEXT: s_mov_b32 s0, s16 1334; GFX9-NEXT: s_mov_b32 s16, s20 1335; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1336; GFX9-NEXT: s_getpc_b64 s[22:23] 1337; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 1338; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 1339; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] 1340; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1341; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1342; GFX9-NEXT: s_mov_b32 s32, s33 1343; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1344; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1345; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1346; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1347; GFX9-NEXT: s_mov_b32 s33, s4 1348; GFX9-NEXT: s_waitcnt vmcnt(0) 1349; GFX9-NEXT: s_setpc_b64 s[30:31] 1350; 1351; GFX11-LABEL: test_call_external_void_func_i64_inreg_i32_inreg_i64_inreg: 1352; GFX11: ; %bb.0: 1353; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1354; GFX11-NEXT: s_mov_b32 s17, s33 1355; GFX11-NEXT: s_mov_b32 s33, s32 1356; GFX11-NEXT: s_or_saveexec_b32 s18, -1 1357; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1358; GFX11-NEXT: s_mov_b32 exec_lo, s18 1359; GFX11-NEXT: v_writelane_b32 v40, s17, 2 1360; GFX11-NEXT: s_add_i32 s32, s32, 16 1361; GFX11-NEXT: s_getpc_b64 s[18:19] 1362; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@lo+4 1363; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_i64_inreg_i32_inreg_i64_inreg@rel32@hi+12 1364; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1365; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1366; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] 1367; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1368; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1369; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1370; GFX11-NEXT: s_mov_b32 s32, s33 1371; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1372; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1373; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1374; GFX11-NEXT: s_mov_b32 exec_lo, s1 1375; GFX11-NEXT: s_mov_b32 s33, s0 1376; GFX11-NEXT: s_waitcnt vmcnt(0) 1377; GFX11-NEXT: s_setpc_b64 s[30:31] 1378 call void @external_void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2) 1379 ret void 1380} 1381 1382define void @test_call_external_void_func_a15i32_inreg([13 x i32] inreg %arg0) #0 { 1383; GFX9-LABEL: test_call_external_void_func_a15i32_inreg: 1384; GFX9: ; %bb.0: 1385; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1386; GFX9-NEXT: s_mov_b32 s29, s33 1387; GFX9-NEXT: s_mov_b32 s33, s32 1388; GFX9-NEXT: s_or_saveexec_b64 vcc, -1 1389; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1390; GFX9-NEXT: s_mov_b64 exec, vcc 1391; GFX9-NEXT: v_writelane_b32 v40, s29, 2 1392; GFX9-NEXT: s_addk_i32 s32, 0x400 1393; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1394; GFX9-NEXT: s_mov_b32 s3, s19 1395; GFX9-NEXT: s_mov_b32 s2, s18 1396; GFX9-NEXT: s_mov_b32 s1, s17 1397; GFX9-NEXT: s_mov_b32 s0, s16 1398; GFX9-NEXT: s_mov_b32 s16, s20 1399; GFX9-NEXT: s_mov_b32 s17, s21 1400; GFX9-NEXT: s_mov_b32 s18, s22 1401; GFX9-NEXT: s_mov_b32 s19, s23 1402; GFX9-NEXT: s_mov_b32 s20, s24 1403; GFX9-NEXT: s_mov_b32 s21, s25 1404; GFX9-NEXT: s_mov_b32 s22, s26 1405; GFX9-NEXT: s_mov_b32 s23, s27 1406; GFX9-NEXT: s_mov_b32 s24, s28 1407; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1408; GFX9-NEXT: s_getpc_b64 vcc 1409; GFX9-NEXT: s_add_u32 vcc_lo, vcc_lo, external_void_func_a15i32_inreg@rel32@lo+4 1410; GFX9-NEXT: s_addc_u32 vcc_hi, vcc_hi, external_void_func_a15i32_inreg@rel32@hi+12 1411; GFX9-NEXT: s_swappc_b64 s[30:31], vcc 1412; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1413; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1414; GFX9-NEXT: s_mov_b32 s32, s33 1415; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1416; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1417; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1418; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1419; GFX9-NEXT: s_mov_b32 s33, s4 1420; GFX9-NEXT: s_waitcnt vmcnt(0) 1421; GFX9-NEXT: s_setpc_b64 s[30:31] 1422; 1423; GFX11-LABEL: test_call_external_void_func_a15i32_inreg: 1424; GFX11: ; %bb.0: 1425; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1426; GFX11-NEXT: s_mov_b32 s25, s33 1427; GFX11-NEXT: s_mov_b32 s33, s32 1428; GFX11-NEXT: s_or_saveexec_b32 s26, -1 1429; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1430; GFX11-NEXT: s_mov_b32 exec_lo, s26 1431; GFX11-NEXT: v_writelane_b32 v40, s25, 2 1432; GFX11-NEXT: s_add_i32 s32, s32, 16 1433; GFX11-NEXT: s_getpc_b64 s[26:27] 1434; GFX11-NEXT: s_add_u32 s26, s26, external_void_func_a15i32_inreg@rel32@lo+4 1435; GFX11-NEXT: s_addc_u32 s27, s27, external_void_func_a15i32_inreg@rel32@hi+12 1436; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1437; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1438; GFX11-NEXT: s_swappc_b64 s[30:31], s[26:27] 1439; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1440; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1441; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1442; GFX11-NEXT: s_mov_b32 s32, s33 1443; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1444; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1445; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1446; GFX11-NEXT: s_mov_b32 exec_lo, s1 1447; GFX11-NEXT: s_mov_b32 s33, s0 1448; GFX11-NEXT: s_waitcnt vmcnt(0) 1449; GFX11-NEXT: s_setpc_b64 s[30:31] 1450 call void @external_void_func_a15i32_inreg([13 x i32] inreg %arg0) 1451 ret void 1452} 1453 1454 1455; FIXME: This should also fail 1456define void @test_call_external_void_func_a15i32_inreg_i32_inreg([13 x i32] inreg %arg0, i32 inreg %arg1) #1 { 1457; GFX9-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: 1458; GFX9: ; %bb.0: 1459; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1460; GFX9-NEXT: s_mov_b32 s21, s33 1461; GFX9-NEXT: s_mov_b32 s33, s32 1462; GFX9-NEXT: s_or_saveexec_b64 s[22:23], -1 1463; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 1464; GFX9-NEXT: s_mov_b64 exec, s[22:23] 1465; GFX9-NEXT: v_writelane_b32 v40, s21, 2 1466; GFX9-NEXT: s_addk_i32 s32, 0x400 1467; GFX9-NEXT: v_writelane_b32 v40, s30, 0 1468; GFX9-NEXT: s_mov_b32 s3, s7 1469; GFX9-NEXT: s_mov_b32 s2, s6 1470; GFX9-NEXT: s_mov_b32 s1, s5 1471; GFX9-NEXT: s_mov_b32 s0, s4 1472; GFX9-NEXT: s_mov_b32 s4, s8 1473; GFX9-NEXT: s_mov_b32 s5, s9 1474; GFX9-NEXT: s_mov_b32 s6, s10 1475; GFX9-NEXT: s_mov_b32 s7, s11 1476; GFX9-NEXT: s_mov_b32 s8, s15 1477; GFX9-NEXT: s_mov_b32 s9, s16 1478; GFX9-NEXT: s_mov_b32 s10, s17 1479; GFX9-NEXT: s_mov_b32 s11, s18 1480; GFX9-NEXT: s_mov_b32 s15, s19 1481; GFX9-NEXT: s_mov_b32 s16, s20 1482; GFX9-NEXT: v_writelane_b32 v40, s31, 1 1483; GFX9-NEXT: s_getpc_b64 s[22:23] 1484; GFX9-NEXT: s_add_u32 s22, s22, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 1485; GFX9-NEXT: s_addc_u32 s23, s23, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 1486; GFX9-NEXT: s_swappc_b64 s[30:31], s[22:23] 1487; GFX9-NEXT: v_readlane_b32 s31, v40, 1 1488; GFX9-NEXT: v_readlane_b32 s30, v40, 0 1489; GFX9-NEXT: s_mov_b32 s32, s33 1490; GFX9-NEXT: v_readlane_b32 s4, v40, 2 1491; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 1492; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 1493; GFX9-NEXT: s_mov_b64 exec, s[6:7] 1494; GFX9-NEXT: s_mov_b32 s33, s4 1495; GFX9-NEXT: s_waitcnt vmcnt(0) 1496; GFX9-NEXT: s_setpc_b64 s[30:31] 1497; 1498; GFX11-LABEL: test_call_external_void_func_a15i32_inreg_i32_inreg: 1499; GFX11: ; %bb.0: 1500; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1501; GFX11-NEXT: s_mov_b32 s17, s33 1502; GFX11-NEXT: s_mov_b32 s33, s32 1503; GFX11-NEXT: s_or_saveexec_b32 s18, -1 1504; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 1505; GFX11-NEXT: s_mov_b32 exec_lo, s18 1506; GFX11-NEXT: v_writelane_b32 v40, s17, 2 1507; GFX11-NEXT: s_add_i32 s32, s32, 16 1508; GFX11-NEXT: s_getpc_b64 s[18:19] 1509; GFX11-NEXT: s_add_u32 s18, s18, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@lo+4 1510; GFX11-NEXT: s_addc_u32 s19, s19, external_void_func_a15i32_inreg_i32_inreg__noimplicit@rel32@hi+12 1511; GFX11-NEXT: v_writelane_b32 v40, s30, 0 1512; GFX11-NEXT: v_writelane_b32 v40, s31, 1 1513; GFX11-NEXT: s_swappc_b64 s[30:31], s[18:19] 1514; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1515; GFX11-NEXT: v_readlane_b32 s31, v40, 1 1516; GFX11-NEXT: v_readlane_b32 s30, v40, 0 1517; GFX11-NEXT: s_mov_b32 s32, s33 1518; GFX11-NEXT: v_readlane_b32 s0, v40, 2 1519; GFX11-NEXT: s_or_saveexec_b32 s1, -1 1520; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 1521; GFX11-NEXT: s_mov_b32 exec_lo, s1 1522; GFX11-NEXT: s_mov_b32 s33, s0 1523; GFX11-NEXT: s_waitcnt vmcnt(0) 1524; GFX11-NEXT: s_setpc_b64 s[30:31] 1525 call void @external_void_func_a15i32_inreg_i32_inreg__noimplicit([13 x i32] inreg %arg0, i32 inreg %arg1) 1526 ret void 1527} 1528 1529attributes #0 = { nounwind } 1530attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" } 1531 1532!llvm.module.flags = !{!0} 1533!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 1534