1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s 3; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s 7 8declare hidden void @external_void_func_i1(i1) #0 9declare hidden void @external_void_func_i1_signext(i1 signext) #0 10declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0 11 12declare hidden void @external_void_func_i8(i8) #0 13declare hidden void @external_void_func_i8_signext(i8 signext) #0 14declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0 15 16declare hidden void @external_void_func_i16(i16) #0 17declare hidden void @external_void_func_i16_signext(i16 signext) #0 18declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0 19 20declare hidden void @external_void_func_i32(i32) #0 21declare hidden void @external_void_func_i64(i64) #0 22declare hidden void @external_void_func_v2i64(<2 x i64>) #0 23declare hidden void @external_void_func_v3i64(<3 x i64>) #0 24declare hidden void @external_void_func_v4i64(<4 x i64>) #0 25 26declare hidden void @external_void_func_f16(half) #0 27declare hidden void @external_void_func_f32(float) #0 28declare hidden void @external_void_func_f64(double) #0 29declare hidden void @external_void_func_v2f32(<2 x float>) #0 30declare hidden void @external_void_func_v2f64(<2 x double>) #0 31declare hidden void @external_void_func_v3f32(<3 x float>) #0 32declare hidden void @external_void_func_v3f64(<3 x double>) #0 33declare hidden void @external_void_func_v5f32(<5 x float>) #0 34 35declare hidden void @external_void_func_v2i16(<2 x i16>) #0 36declare hidden void @external_void_func_v2f16(<2 x half>) #0 37declare hidden void @external_void_func_v3i16(<3 x i16>) #0 38declare hidden void @external_void_func_v3f16(<3 x half>) #0 39declare hidden void @external_void_func_v4i16(<4 x i16>) #0 40declare hidden void @external_void_func_v4f16(<4 x half>) #0 41 42declare hidden void @external_void_func_v2i32(<2 x i32>) #0 43declare hidden void @external_void_func_v3i32(<3 x i32>) #0 44declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0 45declare hidden void @external_void_func_v4i32(<4 x i32>) #0 46declare hidden void @external_void_func_v5i32(<5 x i32>) #0 47declare hidden void @external_void_func_v8i32(<8 x i32>) #0 48declare hidden void @external_void_func_v16i32(<16 x i32>) #0 49declare hidden void @external_void_func_v32i32(<32 x i32>) #0 50declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 51 52; return value and argument 53declare hidden i32 @external_i32_func_i32(i32) #0 54 55; Structs 56declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0 57declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0 58declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0 59 60declare hidden void @external_void_func_v16i8(<16 x i8>) #0 61 62; FIXME: Should be passing -1 63define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { 64; VI-LABEL: test_call_external_void_func_i1_imm: 65; VI: ; %bb.0: 66; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 67; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 68; VI-NEXT: s_mov_b32 s38, -1 69; VI-NEXT: s_mov_b32 s39, 0xe80000 70; VI-NEXT: s_add_u32 s36, s36, s3 71; VI-NEXT: s_addc_u32 s37, s37, 0 72; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 73; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 74; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 75; VI-NEXT: v_mov_b32_e32 v0, 1 76; VI-NEXT: s_mov_b32 s32, 0 77; VI-NEXT: s_getpc_b64 s[4:5] 78; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 79; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 80; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 81; VI-NEXT: s_endpgm 82; 83; CI-LABEL: test_call_external_void_func_i1_imm: 84; CI: ; %bb.0: 85; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 86; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 87; CI-NEXT: s_mov_b32 s38, -1 88; CI-NEXT: s_mov_b32 s39, 0xe8f000 89; CI-NEXT: s_add_u32 s36, s36, s3 90; CI-NEXT: s_addc_u32 s37, s37, 0 91; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 92; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 93; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 94; CI-NEXT: v_mov_b32_e32 v0, 1 95; CI-NEXT: s_mov_b32 s32, 0 96; CI-NEXT: s_getpc_b64 s[4:5] 97; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 98; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 99; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 100; CI-NEXT: s_endpgm 101; 102; GFX9-LABEL: test_call_external_void_func_i1_imm: 103; GFX9: ; %bb.0: 104; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 105; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 106; GFX9-NEXT: s_mov_b32 s38, -1 107; GFX9-NEXT: s_mov_b32 s39, 0xe00000 108; GFX9-NEXT: s_add_u32 s36, s36, s3 109; GFX9-NEXT: s_addc_u32 s37, s37, 0 110; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 111; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 112; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 113; GFX9-NEXT: v_mov_b32_e32 v0, 1 114; GFX9-NEXT: s_mov_b32 s32, 0 115; GFX9-NEXT: s_getpc_b64 s[4:5] 116; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4 117; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12 118; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 119; GFX9-NEXT: s_endpgm 120; 121; GFX11-LABEL: test_call_external_void_func_i1_imm: 122; GFX11: ; %bb.0: 123; GFX11-NEXT: v_mov_b32_e32 v0, 1 124; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 125; GFX11-NEXT: s_mov_b32 s32, 0 126; GFX11-NEXT: s_getpc_b64 s[2:3] 127; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1@rel32@lo+4 128; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1@rel32@hi+12 129; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 130; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 131; GFX11-NEXT: s_endpgm 132; 133; HSA-LABEL: test_call_external_void_func_i1_imm: 134; HSA: ; %bb.0: 135; HSA-NEXT: s_add_i32 s6, s6, s9 136; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 137; HSA-NEXT: s_add_u32 s0, s0, s9 138; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 139; HSA-NEXT: s_addc_u32 s1, s1, 0 140; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 141; HSA-NEXT: v_mov_b32_e32 v0, 1 142; HSA-NEXT: s_mov_b32 s32, 0 143; HSA-NEXT: s_getpc_b64 s[8:9] 144; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1@rel32@lo+4 145; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1@rel32@hi+12 146; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 147; HSA-NEXT: s_endpgm 148 call void @external_void_func_i1(i1 true) 149 ret void 150} 151 152define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { 153; VI-LABEL: test_call_external_void_func_i1_signext: 154; VI: ; %bb.0: 155; VI-NEXT: s_mov_b32 s3, 0xf000 156; VI-NEXT: s_mov_b32 s2, -1 157; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 158; VI-NEXT: s_waitcnt vmcnt(0) 159; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 160; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 161; VI-NEXT: s_mov_b32 s38, -1 162; VI-NEXT: s_mov_b32 s39, 0xe80000 163; VI-NEXT: s_add_u32 s36, s36, s5 164; VI-NEXT: s_addc_u32 s37, s37, 0 165; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 166; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 167; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 168; VI-NEXT: s_mov_b32 s32, 0 169; VI-NEXT: s_getpc_b64 s[4:5] 170; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 171; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 172; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 173; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 174; VI-NEXT: s_endpgm 175; 176; CI-LABEL: test_call_external_void_func_i1_signext: 177; CI: ; %bb.0: 178; CI-NEXT: s_mov_b32 s3, 0xf000 179; CI-NEXT: s_mov_b32 s2, -1 180; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 181; CI-NEXT: s_waitcnt vmcnt(0) 182; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 183; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 184; CI-NEXT: s_mov_b32 s38, -1 185; CI-NEXT: s_mov_b32 s39, 0xe8f000 186; CI-NEXT: s_add_u32 s36, s36, s5 187; CI-NEXT: s_addc_u32 s37, s37, 0 188; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 189; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 190; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 191; CI-NEXT: s_mov_b32 s32, 0 192; CI-NEXT: s_getpc_b64 s[4:5] 193; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 194; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 195; CI-NEXT: v_bfe_i32 v0, v0, 0, 1 196; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 197; CI-NEXT: s_endpgm 198; 199; GFX9-LABEL: test_call_external_void_func_i1_signext: 200; GFX9: ; %bb.0: 201; GFX9-NEXT: s_mov_b32 s3, 0xf000 202; GFX9-NEXT: s_mov_b32 s2, -1 203; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 204; GFX9-NEXT: s_waitcnt vmcnt(0) 205; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 206; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 207; GFX9-NEXT: s_mov_b32 s38, -1 208; GFX9-NEXT: s_mov_b32 s39, 0xe00000 209; GFX9-NEXT: s_add_u32 s36, s36, s5 210; GFX9-NEXT: s_addc_u32 s37, s37, 0 211; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 212; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 213; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 214; GFX9-NEXT: s_mov_b32 s32, 0 215; GFX9-NEXT: s_getpc_b64 s[4:5] 216; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4 217; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12 218; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 219; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 220; GFX9-NEXT: s_endpgm 221; 222; GFX11-LABEL: test_call_external_void_func_i1_signext: 223; GFX11: ; %bb.0: 224; GFX11-NEXT: s_mov_b32 s3, 0x31016000 225; GFX11-NEXT: s_mov_b32 s2, -1 226; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 227; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc 228; GFX11-NEXT: s_waitcnt vmcnt(0) 229; GFX11-NEXT: s_mov_b32 s32, 0 230; GFX11-NEXT: s_getpc_b64 s[2:3] 231; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_signext@rel32@lo+4 232; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_signext@rel32@hi+12 233; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 1 234; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 235; GFX11-NEXT: s_endpgm 236; 237; HSA-LABEL: test_call_external_void_func_i1_signext: 238; HSA: ; %bb.0: 239; HSA-NEXT: s_mov_b32 s7, 0x1100f000 240; HSA-NEXT: s_mov_b32 s6, -1 241; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc 242; HSA-NEXT: s_waitcnt vmcnt(0) 243; HSA-NEXT: s_add_i32 s8, s8, s11 244; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 245; HSA-NEXT: s_add_u32 s0, s0, s11 246; HSA-NEXT: s_addc_u32 s1, s1, 0 247; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 248; HSA-NEXT: s_mov_b32 s32, 0 249; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 250; HSA-NEXT: s_getpc_b64 s[8:9] 251; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_signext@rel32@lo+4 252; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_signext@rel32@hi+12 253; HSA-NEXT: v_bfe_i32 v0, v0, 0, 1 254; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 255; HSA-NEXT: s_endpgm 256 %var = load volatile i1, ptr addrspace(1) undef 257 call void @external_void_func_i1_signext(i1 signext %var) 258 ret void 259} 260 261; FIXME: load should be scheduled before getpc 262define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { 263; VI-LABEL: test_call_external_void_func_i1_zeroext: 264; VI: ; %bb.0: 265; VI-NEXT: s_mov_b32 s3, 0xf000 266; VI-NEXT: s_mov_b32 s2, -1 267; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 268; VI-NEXT: s_waitcnt vmcnt(0) 269; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 270; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 271; VI-NEXT: s_mov_b32 s38, -1 272; VI-NEXT: s_mov_b32 s39, 0xe80000 273; VI-NEXT: s_add_u32 s36, s36, s5 274; VI-NEXT: s_addc_u32 s37, s37, 0 275; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 276; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 277; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 278; VI-NEXT: s_mov_b32 s32, 0 279; VI-NEXT: s_getpc_b64 s[4:5] 280; VI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 281; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 282; VI-NEXT: v_and_b32_e32 v0, 1, v0 283; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 284; VI-NEXT: s_endpgm 285; 286; CI-LABEL: test_call_external_void_func_i1_zeroext: 287; CI: ; %bb.0: 288; CI-NEXT: s_mov_b32 s3, 0xf000 289; CI-NEXT: s_mov_b32 s2, -1 290; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 291; CI-NEXT: s_waitcnt vmcnt(0) 292; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 293; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 294; CI-NEXT: s_mov_b32 s38, -1 295; CI-NEXT: s_mov_b32 s39, 0xe8f000 296; CI-NEXT: s_add_u32 s36, s36, s5 297; CI-NEXT: s_addc_u32 s37, s37, 0 298; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 299; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 300; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 301; CI-NEXT: s_mov_b32 s32, 0 302; CI-NEXT: s_getpc_b64 s[4:5] 303; CI-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 304; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 305; CI-NEXT: v_and_b32_e32 v0, 1, v0 306; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 307; CI-NEXT: s_endpgm 308; 309; GFX9-LABEL: test_call_external_void_func_i1_zeroext: 310; GFX9: ; %bb.0: 311; GFX9-NEXT: s_mov_b32 s3, 0xf000 312; GFX9-NEXT: s_mov_b32 s2, -1 313; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 314; GFX9-NEXT: s_waitcnt vmcnt(0) 315; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 316; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 317; GFX9-NEXT: s_mov_b32 s38, -1 318; GFX9-NEXT: s_mov_b32 s39, 0xe00000 319; GFX9-NEXT: s_add_u32 s36, s36, s5 320; GFX9-NEXT: s_addc_u32 s37, s37, 0 321; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 322; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 323; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 324; GFX9-NEXT: s_mov_b32 s32, 0 325; GFX9-NEXT: s_getpc_b64 s[4:5] 326; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4 327; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12 328; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 329; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 330; GFX9-NEXT: s_endpgm 331; 332; GFX11-LABEL: test_call_external_void_func_i1_zeroext: 333; GFX11: ; %bb.0: 334; GFX11-NEXT: s_mov_b32 s3, 0x31016000 335; GFX11-NEXT: s_mov_b32 s2, -1 336; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 337; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc 338; GFX11-NEXT: s_waitcnt vmcnt(0) 339; GFX11-NEXT: s_mov_b32 s32, 0 340; GFX11-NEXT: s_getpc_b64 s[2:3] 341; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i1_zeroext@rel32@lo+4 342; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i1_zeroext@rel32@hi+12 343; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 344; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 345; GFX11-NEXT: s_endpgm 346; 347; HSA-LABEL: test_call_external_void_func_i1_zeroext: 348; HSA: ; %bb.0: 349; HSA-NEXT: s_mov_b32 s7, 0x1100f000 350; HSA-NEXT: s_mov_b32 s6, -1 351; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc 352; HSA-NEXT: s_waitcnt vmcnt(0) 353; HSA-NEXT: s_add_i32 s8, s8, s11 354; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 355; HSA-NEXT: s_add_u32 s0, s0, s11 356; HSA-NEXT: s_addc_u32 s1, s1, 0 357; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 358; HSA-NEXT: s_mov_b32 s32, 0 359; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 360; HSA-NEXT: s_getpc_b64 s[8:9] 361; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i1_zeroext@rel32@lo+4 362; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i1_zeroext@rel32@hi+12 363; HSA-NEXT: v_and_b32_e32 v0, 1, v0 364; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 365; HSA-NEXT: s_endpgm 366 %var = load volatile i1, ptr addrspace(1) undef 367 call void @external_void_func_i1_zeroext(i1 zeroext %var) 368 ret void 369} 370 371define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { 372; VI-LABEL: test_call_external_void_func_i8_imm: 373; VI: ; %bb.0: 374; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 375; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 376; VI-NEXT: s_mov_b32 s38, -1 377; VI-NEXT: s_mov_b32 s39, 0xe80000 378; VI-NEXT: s_add_u32 s36, s36, s5 379; VI-NEXT: s_addc_u32 s37, s37, 0 380; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 381; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 382; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 383; VI-NEXT: v_mov_b32_e32 v0, 0x7b 384; VI-NEXT: s_mov_b32 s32, 0 385; VI-NEXT: s_getpc_b64 s[4:5] 386; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 387; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 388; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 389; VI-NEXT: s_endpgm 390; 391; CI-LABEL: test_call_external_void_func_i8_imm: 392; CI: ; %bb.0: 393; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 394; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 395; CI-NEXT: s_mov_b32 s38, -1 396; CI-NEXT: s_mov_b32 s39, 0xe8f000 397; CI-NEXT: s_add_u32 s36, s36, s5 398; CI-NEXT: s_addc_u32 s37, s37, 0 399; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 400; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 401; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 402; CI-NEXT: v_mov_b32_e32 v0, 0x7b 403; CI-NEXT: s_mov_b32 s32, 0 404; CI-NEXT: s_getpc_b64 s[4:5] 405; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 406; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 407; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 408; CI-NEXT: s_endpgm 409; 410; GFX9-LABEL: test_call_external_void_func_i8_imm: 411; GFX9: ; %bb.0: 412; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 413; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 414; GFX9-NEXT: s_mov_b32 s38, -1 415; GFX9-NEXT: s_mov_b32 s39, 0xe00000 416; GFX9-NEXT: s_add_u32 s36, s36, s5 417; GFX9-NEXT: s_addc_u32 s37, s37, 0 418; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 419; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 420; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 421; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 422; GFX9-NEXT: s_mov_b32 s32, 0 423; GFX9-NEXT: s_getpc_b64 s[4:5] 424; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4 425; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12 426; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 427; GFX9-NEXT: s_endpgm 428; 429; GFX11-LABEL: test_call_external_void_func_i8_imm: 430; GFX11: ; %bb.0: 431; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b 432; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 433; GFX11-NEXT: s_mov_b32 s32, 0 434; GFX11-NEXT: s_getpc_b64 s[2:3] 435; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4 436; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12 437; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 438; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 439; GFX11-NEXT: s_endpgm 440; 441; HSA-LABEL: test_call_external_void_func_i8_imm: 442; HSA: ; %bb.0: 443; HSA-NEXT: s_add_i32 s8, s8, s11 444; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 445; HSA-NEXT: s_add_u32 s0, s0, s11 446; HSA-NEXT: s_addc_u32 s1, s1, 0 447; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 448; HSA-NEXT: v_mov_b32_e32 v0, 0x7b 449; HSA-NEXT: s_mov_b32 s32, 0 450; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 451; HSA-NEXT: s_getpc_b64 s[8:9] 452; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8@rel32@lo+4 453; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8@rel32@hi+12 454; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 455; HSA-NEXT: s_endpgm 456 call void @external_void_func_i8(i8 123) 457 ret void 458} 459 460; FIXME: don't wait before call 461define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { 462; VI-LABEL: test_call_external_void_func_i8_signext: 463; VI: ; %bb.0: 464; VI-NEXT: s_mov_b32 s3, 0xf000 465; VI-NEXT: s_mov_b32 s2, -1 466; VI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc 467; VI-NEXT: s_waitcnt vmcnt(0) 468; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 469; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 470; VI-NEXT: s_mov_b32 s38, -1 471; VI-NEXT: s_mov_b32 s39, 0xe80000 472; VI-NEXT: s_add_u32 s36, s36, s5 473; VI-NEXT: s_addc_u32 s37, s37, 0 474; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 475; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 476; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 477; VI-NEXT: s_mov_b32 s32, 0 478; VI-NEXT: s_getpc_b64 s[4:5] 479; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 480; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 481; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 482; VI-NEXT: s_endpgm 483; 484; CI-LABEL: test_call_external_void_func_i8_signext: 485; CI: ; %bb.0: 486; CI-NEXT: s_mov_b32 s3, 0xf000 487; CI-NEXT: s_mov_b32 s2, -1 488; CI-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc 489; CI-NEXT: s_waitcnt vmcnt(0) 490; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 491; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 492; CI-NEXT: s_mov_b32 s38, -1 493; CI-NEXT: s_mov_b32 s39, 0xe8f000 494; CI-NEXT: s_add_u32 s36, s36, s5 495; CI-NEXT: s_addc_u32 s37, s37, 0 496; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 497; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 498; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 499; CI-NEXT: s_mov_b32 s32, 0 500; CI-NEXT: s_getpc_b64 s[4:5] 501; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 502; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 503; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 504; CI-NEXT: s_endpgm 505; 506; GFX9-LABEL: test_call_external_void_func_i8_signext: 507; GFX9: ; %bb.0: 508; GFX9-NEXT: s_mov_b32 s3, 0xf000 509; GFX9-NEXT: s_mov_b32 s2, -1 510; GFX9-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 glc 511; GFX9-NEXT: s_waitcnt vmcnt(0) 512; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 513; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 514; GFX9-NEXT: s_mov_b32 s38, -1 515; GFX9-NEXT: s_mov_b32 s39, 0xe00000 516; GFX9-NEXT: s_add_u32 s36, s36, s5 517; GFX9-NEXT: s_addc_u32 s37, s37, 0 518; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 519; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 520; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 521; GFX9-NEXT: s_mov_b32 s32, 0 522; GFX9-NEXT: s_getpc_b64 s[4:5] 523; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4 524; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12 525; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 526; GFX9-NEXT: s_endpgm 527; 528; GFX11-LABEL: test_call_external_void_func_i8_signext: 529; GFX11: ; %bb.0: 530; GFX11-NEXT: s_mov_b32 s3, 0x31016000 531; GFX11-NEXT: s_mov_b32 s2, -1 532; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 533; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 glc dlc 534; GFX11-NEXT: s_waitcnt vmcnt(0) 535; GFX11-NEXT: s_mov_b32 s32, 0 536; GFX11-NEXT: s_getpc_b64 s[2:3] 537; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_signext@rel32@lo+4 538; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_signext@rel32@hi+12 539; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 540; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 541; GFX11-NEXT: s_endpgm 542; 543; HSA-LABEL: test_call_external_void_func_i8_signext: 544; HSA: ; %bb.0: 545; HSA-NEXT: s_mov_b32 s7, 0x1100f000 546; HSA-NEXT: s_mov_b32 s6, -1 547; HSA-NEXT: buffer_load_sbyte v0, off, s[4:7], 0 glc 548; HSA-NEXT: s_waitcnt vmcnt(0) 549; HSA-NEXT: s_add_i32 s8, s8, s11 550; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 551; HSA-NEXT: s_add_u32 s0, s0, s11 552; HSA-NEXT: s_addc_u32 s1, s1, 0 553; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 554; HSA-NEXT: s_mov_b32 s32, 0 555; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 556; HSA-NEXT: s_getpc_b64 s[8:9] 557; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_signext@rel32@lo+4 558; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_signext@rel32@hi+12 559; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 560; HSA-NEXT: s_endpgm 561 %var = load volatile i8, ptr addrspace(1) undef 562 call void @external_void_func_i8_signext(i8 signext %var) 563 ret void 564} 565 566define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { 567; VI-LABEL: test_call_external_void_func_i8_zeroext: 568; VI: ; %bb.0: 569; VI-NEXT: s_mov_b32 s3, 0xf000 570; VI-NEXT: s_mov_b32 s2, -1 571; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 572; VI-NEXT: s_waitcnt vmcnt(0) 573; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 574; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 575; VI-NEXT: s_mov_b32 s38, -1 576; VI-NEXT: s_mov_b32 s39, 0xe80000 577; VI-NEXT: s_add_u32 s36, s36, s5 578; VI-NEXT: s_addc_u32 s37, s37, 0 579; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 580; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 581; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 582; VI-NEXT: s_mov_b32 s32, 0 583; VI-NEXT: s_getpc_b64 s[4:5] 584; VI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 585; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 586; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 587; VI-NEXT: s_endpgm 588; 589; CI-LABEL: test_call_external_void_func_i8_zeroext: 590; CI: ; %bb.0: 591; CI-NEXT: s_mov_b32 s3, 0xf000 592; CI-NEXT: s_mov_b32 s2, -1 593; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 594; CI-NEXT: s_waitcnt vmcnt(0) 595; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 596; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 597; CI-NEXT: s_mov_b32 s38, -1 598; CI-NEXT: s_mov_b32 s39, 0xe8f000 599; CI-NEXT: s_add_u32 s36, s36, s5 600; CI-NEXT: s_addc_u32 s37, s37, 0 601; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 602; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 603; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 604; CI-NEXT: s_mov_b32 s32, 0 605; CI-NEXT: s_getpc_b64 s[4:5] 606; CI-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 607; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 608; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 609; CI-NEXT: s_endpgm 610; 611; GFX9-LABEL: test_call_external_void_func_i8_zeroext: 612; GFX9: ; %bb.0: 613; GFX9-NEXT: s_mov_b32 s3, 0xf000 614; GFX9-NEXT: s_mov_b32 s2, -1 615; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 glc 616; GFX9-NEXT: s_waitcnt vmcnt(0) 617; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 618; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 619; GFX9-NEXT: s_mov_b32 s38, -1 620; GFX9-NEXT: s_mov_b32 s39, 0xe00000 621; GFX9-NEXT: s_add_u32 s36, s36, s5 622; GFX9-NEXT: s_addc_u32 s37, s37, 0 623; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 624; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 625; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 626; GFX9-NEXT: s_mov_b32 s32, 0 627; GFX9-NEXT: s_getpc_b64 s[4:5] 628; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4 629; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12 630; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 631; GFX9-NEXT: s_endpgm 632; 633; GFX11-LABEL: test_call_external_void_func_i8_zeroext: 634; GFX11: ; %bb.0: 635; GFX11-NEXT: s_mov_b32 s3, 0x31016000 636; GFX11-NEXT: s_mov_b32 s2, -1 637; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 638; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 glc dlc 639; GFX11-NEXT: s_waitcnt vmcnt(0) 640; GFX11-NEXT: s_mov_b32 s32, 0 641; GFX11-NEXT: s_getpc_b64 s[2:3] 642; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i8_zeroext@rel32@lo+4 643; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i8_zeroext@rel32@hi+12 644; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 645; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 646; GFX11-NEXT: s_endpgm 647; 648; HSA-LABEL: test_call_external_void_func_i8_zeroext: 649; HSA: ; %bb.0: 650; HSA-NEXT: s_mov_b32 s7, 0x1100f000 651; HSA-NEXT: s_mov_b32 s6, -1 652; HSA-NEXT: buffer_load_ubyte v0, off, s[4:7], 0 glc 653; HSA-NEXT: s_waitcnt vmcnt(0) 654; HSA-NEXT: s_add_i32 s8, s8, s11 655; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 656; HSA-NEXT: s_add_u32 s0, s0, s11 657; HSA-NEXT: s_addc_u32 s1, s1, 0 658; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 659; HSA-NEXT: s_mov_b32 s32, 0 660; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 661; HSA-NEXT: s_getpc_b64 s[8:9] 662; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i8_zeroext@rel32@lo+4 663; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i8_zeroext@rel32@hi+12 664; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 665; HSA-NEXT: s_endpgm 666 %var = load volatile i8, ptr addrspace(1) undef 667 call void @external_void_func_i8_zeroext(i8 zeroext %var) 668 ret void 669} 670 671define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { 672; VI-LABEL: test_call_external_void_func_i16_imm: 673; VI: ; %bb.0: 674; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 675; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 676; VI-NEXT: s_mov_b32 s38, -1 677; VI-NEXT: s_mov_b32 s39, 0xe80000 678; VI-NEXT: s_add_u32 s36, s36, s3 679; VI-NEXT: s_addc_u32 s37, s37, 0 680; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 681; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 682; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 683; VI-NEXT: v_mov_b32_e32 v0, 0x7b 684; VI-NEXT: s_mov_b32 s32, 0 685; VI-NEXT: s_getpc_b64 s[4:5] 686; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 687; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 688; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 689; VI-NEXT: s_endpgm 690; 691; CI-LABEL: test_call_external_void_func_i16_imm: 692; CI: ; %bb.0: 693; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 694; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 695; CI-NEXT: s_mov_b32 s38, -1 696; CI-NEXT: s_mov_b32 s39, 0xe8f000 697; CI-NEXT: s_add_u32 s36, s36, s3 698; CI-NEXT: s_addc_u32 s37, s37, 0 699; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 700; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 701; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 702; CI-NEXT: v_mov_b32_e32 v0, 0x7b 703; CI-NEXT: s_mov_b32 s32, 0 704; CI-NEXT: s_getpc_b64 s[4:5] 705; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 706; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 707; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 708; CI-NEXT: s_endpgm 709; 710; GFX9-LABEL: test_call_external_void_func_i16_imm: 711; GFX9: ; %bb.0: 712; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 713; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 714; GFX9-NEXT: s_mov_b32 s38, -1 715; GFX9-NEXT: s_mov_b32 s39, 0xe00000 716; GFX9-NEXT: s_add_u32 s36, s36, s3 717; GFX9-NEXT: s_addc_u32 s37, s37, 0 718; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 719; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 720; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 721; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 722; GFX9-NEXT: s_mov_b32 s32, 0 723; GFX9-NEXT: s_getpc_b64 s[4:5] 724; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4 725; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12 726; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 727; GFX9-NEXT: s_endpgm 728; 729; GFX11-LABEL: test_call_external_void_func_i16_imm: 730; GFX11: ; %bb.0: 731; GFX11-NEXT: v_mov_b32_e32 v0, 0x7b 732; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 733; GFX11-NEXT: s_mov_b32 s32, 0 734; GFX11-NEXT: s_getpc_b64 s[2:3] 735; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4 736; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12 737; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 738; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 739; GFX11-NEXT: s_endpgm 740; 741; HSA-LABEL: test_call_external_void_func_i16_imm: 742; HSA: ; %bb.0: 743; HSA-NEXT: s_add_i32 s6, s6, s9 744; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 745; HSA-NEXT: s_add_u32 s0, s0, s9 746; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 747; HSA-NEXT: s_addc_u32 s1, s1, 0 748; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 749; HSA-NEXT: v_mov_b32_e32 v0, 0x7b 750; HSA-NEXT: s_mov_b32 s32, 0 751; HSA-NEXT: s_getpc_b64 s[8:9] 752; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16@rel32@lo+4 753; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16@rel32@hi+12 754; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 755; HSA-NEXT: s_endpgm 756 call void @external_void_func_i16(i16 123) 757 ret void 758} 759 760define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { 761; VI-LABEL: test_call_external_void_func_i16_signext: 762; VI: ; %bb.0: 763; VI-NEXT: s_mov_b32 s3, 0xf000 764; VI-NEXT: s_mov_b32 s2, -1 765; VI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc 766; VI-NEXT: s_waitcnt vmcnt(0) 767; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 768; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 769; VI-NEXT: s_mov_b32 s38, -1 770; VI-NEXT: s_mov_b32 s39, 0xe80000 771; VI-NEXT: s_add_u32 s36, s36, s5 772; VI-NEXT: s_addc_u32 s37, s37, 0 773; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 774; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 775; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 776; VI-NEXT: s_mov_b32 s32, 0 777; VI-NEXT: s_getpc_b64 s[4:5] 778; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 779; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 780; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 781; VI-NEXT: s_endpgm 782; 783; CI-LABEL: test_call_external_void_func_i16_signext: 784; CI: ; %bb.0: 785; CI-NEXT: s_mov_b32 s3, 0xf000 786; CI-NEXT: s_mov_b32 s2, -1 787; CI-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc 788; CI-NEXT: s_waitcnt vmcnt(0) 789; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 790; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 791; CI-NEXT: s_mov_b32 s38, -1 792; CI-NEXT: s_mov_b32 s39, 0xe8f000 793; CI-NEXT: s_add_u32 s36, s36, s5 794; CI-NEXT: s_addc_u32 s37, s37, 0 795; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 796; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 797; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 798; CI-NEXT: s_mov_b32 s32, 0 799; CI-NEXT: s_getpc_b64 s[4:5] 800; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 801; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 802; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 803; CI-NEXT: s_endpgm 804; 805; GFX9-LABEL: test_call_external_void_func_i16_signext: 806; GFX9: ; %bb.0: 807; GFX9-NEXT: s_mov_b32 s3, 0xf000 808; GFX9-NEXT: s_mov_b32 s2, -1 809; GFX9-NEXT: buffer_load_sshort v0, off, s[0:3], 0 glc 810; GFX9-NEXT: s_waitcnt vmcnt(0) 811; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 812; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 813; GFX9-NEXT: s_mov_b32 s38, -1 814; GFX9-NEXT: s_mov_b32 s39, 0xe00000 815; GFX9-NEXT: s_add_u32 s36, s36, s5 816; GFX9-NEXT: s_addc_u32 s37, s37, 0 817; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 818; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 819; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 820; GFX9-NEXT: s_mov_b32 s32, 0 821; GFX9-NEXT: s_getpc_b64 s[4:5] 822; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4 823; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12 824; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 825; GFX9-NEXT: s_endpgm 826; 827; GFX11-LABEL: test_call_external_void_func_i16_signext: 828; GFX11: ; %bb.0: 829; GFX11-NEXT: s_mov_b32 s3, 0x31016000 830; GFX11-NEXT: s_mov_b32 s2, -1 831; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 832; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 glc dlc 833; GFX11-NEXT: s_waitcnt vmcnt(0) 834; GFX11-NEXT: s_mov_b32 s32, 0 835; GFX11-NEXT: s_getpc_b64 s[2:3] 836; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_signext@rel32@lo+4 837; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_signext@rel32@hi+12 838; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 839; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 840; GFX11-NEXT: s_endpgm 841; 842; HSA-LABEL: test_call_external_void_func_i16_signext: 843; HSA: ; %bb.0: 844; HSA-NEXT: s_mov_b32 s7, 0x1100f000 845; HSA-NEXT: s_mov_b32 s6, -1 846; HSA-NEXT: buffer_load_sshort v0, off, s[4:7], 0 glc 847; HSA-NEXT: s_waitcnt vmcnt(0) 848; HSA-NEXT: s_add_i32 s8, s8, s11 849; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 850; HSA-NEXT: s_add_u32 s0, s0, s11 851; HSA-NEXT: s_addc_u32 s1, s1, 0 852; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 853; HSA-NEXT: s_mov_b32 s32, 0 854; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 855; HSA-NEXT: s_getpc_b64 s[8:9] 856; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_signext@rel32@lo+4 857; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_signext@rel32@hi+12 858; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 859; HSA-NEXT: s_endpgm 860 %var = load volatile i16, ptr addrspace(1) undef 861 call void @external_void_func_i16_signext(i16 signext %var) 862 ret void 863} 864 865define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { 866; VI-LABEL: test_call_external_void_func_i16_zeroext: 867; VI: ; %bb.0: 868; VI-NEXT: s_mov_b32 s3, 0xf000 869; VI-NEXT: s_mov_b32 s2, -1 870; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc 871; VI-NEXT: s_waitcnt vmcnt(0) 872; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 873; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 874; VI-NEXT: s_mov_b32 s38, -1 875; VI-NEXT: s_mov_b32 s39, 0xe80000 876; VI-NEXT: s_add_u32 s36, s36, s5 877; VI-NEXT: s_addc_u32 s37, s37, 0 878; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 879; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 880; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 881; VI-NEXT: s_mov_b32 s32, 0 882; VI-NEXT: s_getpc_b64 s[4:5] 883; VI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 884; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 885; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 886; VI-NEXT: s_endpgm 887; 888; CI-LABEL: test_call_external_void_func_i16_zeroext: 889; CI: ; %bb.0: 890; CI-NEXT: s_mov_b32 s3, 0xf000 891; CI-NEXT: s_mov_b32 s2, -1 892; CI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc 893; CI-NEXT: s_waitcnt vmcnt(0) 894; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 895; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 896; CI-NEXT: s_mov_b32 s38, -1 897; CI-NEXT: s_mov_b32 s39, 0xe8f000 898; CI-NEXT: s_add_u32 s36, s36, s5 899; CI-NEXT: s_addc_u32 s37, s37, 0 900; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 901; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 902; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 903; CI-NEXT: s_mov_b32 s32, 0 904; CI-NEXT: s_getpc_b64 s[4:5] 905; CI-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 906; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 907; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 908; CI-NEXT: s_endpgm 909; 910; GFX9-LABEL: test_call_external_void_func_i16_zeroext: 911; GFX9: ; %bb.0: 912; GFX9-NEXT: s_mov_b32 s3, 0xf000 913; GFX9-NEXT: s_mov_b32 s2, -1 914; GFX9-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc 915; GFX9-NEXT: s_waitcnt vmcnt(0) 916; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 917; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 918; GFX9-NEXT: s_mov_b32 s38, -1 919; GFX9-NEXT: s_mov_b32 s39, 0xe00000 920; GFX9-NEXT: s_add_u32 s36, s36, s5 921; GFX9-NEXT: s_addc_u32 s37, s37, 0 922; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 923; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 924; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 925; GFX9-NEXT: s_mov_b32 s32, 0 926; GFX9-NEXT: s_getpc_b64 s[4:5] 927; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4 928; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12 929; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 930; GFX9-NEXT: s_endpgm 931; 932; GFX11-LABEL: test_call_external_void_func_i16_zeroext: 933; GFX11: ; %bb.0: 934; GFX11-NEXT: s_mov_b32 s3, 0x31016000 935; GFX11-NEXT: s_mov_b32 s2, -1 936; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 937; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc 938; GFX11-NEXT: s_waitcnt vmcnt(0) 939; GFX11-NEXT: s_mov_b32 s32, 0 940; GFX11-NEXT: s_getpc_b64 s[2:3] 941; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i16_zeroext@rel32@lo+4 942; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i16_zeroext@rel32@hi+12 943; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 944; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 945; GFX11-NEXT: s_endpgm 946; 947; HSA-LABEL: test_call_external_void_func_i16_zeroext: 948; HSA: ; %bb.0: 949; HSA-NEXT: s_mov_b32 s7, 0x1100f000 950; HSA-NEXT: s_mov_b32 s6, -1 951; HSA-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc 952; HSA-NEXT: s_waitcnt vmcnt(0) 953; HSA-NEXT: s_add_i32 s8, s8, s11 954; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 955; HSA-NEXT: s_add_u32 s0, s0, s11 956; HSA-NEXT: s_addc_u32 s1, s1, 0 957; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 958; HSA-NEXT: s_mov_b32 s32, 0 959; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 960; HSA-NEXT: s_getpc_b64 s[8:9] 961; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i16_zeroext@rel32@lo+4 962; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i16_zeroext@rel32@hi+12 963; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 964; HSA-NEXT: s_endpgm 965 %var = load volatile i16, ptr addrspace(1) undef 966 call void @external_void_func_i16_zeroext(i16 zeroext %var) 967 ret void 968} 969 970define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { 971; VI-LABEL: test_call_external_void_func_i32_imm: 972; VI: ; %bb.0: 973; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 974; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 975; VI-NEXT: s_mov_b32 s38, -1 976; VI-NEXT: s_mov_b32 s39, 0xe80000 977; VI-NEXT: s_add_u32 s36, s36, s5 978; VI-NEXT: s_addc_u32 s37, s37, 0 979; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 980; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 981; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 982; VI-NEXT: v_mov_b32_e32 v0, 42 983; VI-NEXT: s_mov_b32 s32, 0 984; VI-NEXT: s_getpc_b64 s[4:5] 985; VI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 986; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 987; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 988; VI-NEXT: s_endpgm 989; 990; CI-LABEL: test_call_external_void_func_i32_imm: 991; CI: ; %bb.0: 992; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 993; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 994; CI-NEXT: s_mov_b32 s38, -1 995; CI-NEXT: s_mov_b32 s39, 0xe8f000 996; CI-NEXT: s_add_u32 s36, s36, s5 997; CI-NEXT: s_addc_u32 s37, s37, 0 998; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 999; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1000; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1001; CI-NEXT: v_mov_b32_e32 v0, 42 1002; CI-NEXT: s_mov_b32 s32, 0 1003; CI-NEXT: s_getpc_b64 s[4:5] 1004; CI-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 1005; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 1006; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1007; CI-NEXT: s_endpgm 1008; 1009; GFX9-LABEL: test_call_external_void_func_i32_imm: 1010; GFX9: ; %bb.0: 1011; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1012; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1013; GFX9-NEXT: s_mov_b32 s38, -1 1014; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1015; GFX9-NEXT: s_add_u32 s36, s36, s5 1016; GFX9-NEXT: s_addc_u32 s37, s37, 0 1017; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1018; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1019; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1020; GFX9-NEXT: v_mov_b32_e32 v0, 42 1021; GFX9-NEXT: s_mov_b32 s32, 0 1022; GFX9-NEXT: s_getpc_b64 s[4:5] 1023; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4 1024; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12 1025; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1026; GFX9-NEXT: s_endpgm 1027; 1028; GFX11-LABEL: test_call_external_void_func_i32_imm: 1029; GFX11: ; %bb.0: 1030; GFX11-NEXT: v_mov_b32_e32 v0, 42 1031; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1032; GFX11-NEXT: s_mov_b32 s32, 0 1033; GFX11-NEXT: s_getpc_b64 s[2:3] 1034; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i32@rel32@lo+4 1035; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i32@rel32@hi+12 1036; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1037; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1038; GFX11-NEXT: s_endpgm 1039; 1040; HSA-LABEL: test_call_external_void_func_i32_imm: 1041; HSA: ; %bb.0: 1042; HSA-NEXT: s_add_i32 s8, s8, s11 1043; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 1044; HSA-NEXT: s_add_u32 s0, s0, s11 1045; HSA-NEXT: s_addc_u32 s1, s1, 0 1046; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1047; HSA-NEXT: v_mov_b32_e32 v0, 42 1048; HSA-NEXT: s_mov_b32 s32, 0 1049; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 1050; HSA-NEXT: s_getpc_b64 s[8:9] 1051; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i32@rel32@lo+4 1052; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i32@rel32@hi+12 1053; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1054; HSA-NEXT: s_endpgm 1055 call void @external_void_func_i32(i32 42) 1056 ret void 1057} 1058 1059define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { 1060; VI-LABEL: test_call_external_void_func_i64_imm: 1061; VI: ; %bb.0: 1062; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1063; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1064; VI-NEXT: s_mov_b32 s38, -1 1065; VI-NEXT: s_mov_b32 s39, 0xe80000 1066; VI-NEXT: s_add_u32 s36, s36, s3 1067; VI-NEXT: s_addc_u32 s37, s37, 0 1068; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1069; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1070; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1071; VI-NEXT: v_mov_b32_e32 v0, 0x7b 1072; VI-NEXT: v_mov_b32_e32 v1, 0 1073; VI-NEXT: s_mov_b32 s32, 0 1074; VI-NEXT: s_getpc_b64 s[4:5] 1075; VI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 1076; VI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 1077; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1078; VI-NEXT: s_endpgm 1079; 1080; CI-LABEL: test_call_external_void_func_i64_imm: 1081; CI: ; %bb.0: 1082; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1083; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1084; CI-NEXT: s_mov_b32 s38, -1 1085; CI-NEXT: s_mov_b32 s39, 0xe8f000 1086; CI-NEXT: s_add_u32 s36, s36, s3 1087; CI-NEXT: s_addc_u32 s37, s37, 0 1088; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1089; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1090; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1091; CI-NEXT: v_mov_b32_e32 v0, 0x7b 1092; CI-NEXT: v_mov_b32_e32 v1, 0 1093; CI-NEXT: s_mov_b32 s32, 0 1094; CI-NEXT: s_getpc_b64 s[4:5] 1095; CI-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 1096; CI-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 1097; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1098; CI-NEXT: s_endpgm 1099; 1100; GFX9-LABEL: test_call_external_void_func_i64_imm: 1101; GFX9: ; %bb.0: 1102; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1103; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1104; GFX9-NEXT: s_mov_b32 s38, -1 1105; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1106; GFX9-NEXT: s_add_u32 s36, s36, s3 1107; GFX9-NEXT: s_addc_u32 s37, s37, 0 1108; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1109; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1110; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1111; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 1112; GFX9-NEXT: v_mov_b32_e32 v1, 0 1113; GFX9-NEXT: s_mov_b32 s32, 0 1114; GFX9-NEXT: s_getpc_b64 s[4:5] 1115; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4 1116; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12 1117; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1118; GFX9-NEXT: s_endpgm 1119; 1120; GFX11-LABEL: test_call_external_void_func_i64_imm: 1121; GFX11: ; %bb.0: 1122; GFX11-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 1123; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1124; GFX11-NEXT: s_mov_b32 s32, 0 1125; GFX11-NEXT: s_getpc_b64 s[2:3] 1126; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_i64@rel32@lo+4 1127; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_i64@rel32@hi+12 1128; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1129; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1130; GFX11-NEXT: s_endpgm 1131; 1132; HSA-LABEL: test_call_external_void_func_i64_imm: 1133; HSA: ; %bb.0: 1134; HSA-NEXT: s_add_i32 s6, s6, s9 1135; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1136; HSA-NEXT: s_add_u32 s0, s0, s9 1137; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1138; HSA-NEXT: s_addc_u32 s1, s1, 0 1139; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1140; HSA-NEXT: v_mov_b32_e32 v0, 0x7b 1141; HSA-NEXT: v_mov_b32_e32 v1, 0 1142; HSA-NEXT: s_mov_b32 s32, 0 1143; HSA-NEXT: s_getpc_b64 s[8:9] 1144; HSA-NEXT: s_add_u32 s8, s8, external_void_func_i64@rel32@lo+4 1145; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_i64@rel32@hi+12 1146; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1147; HSA-NEXT: s_endpgm 1148 call void @external_void_func_i64(i64 123) 1149 ret void 1150} 1151 1152define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { 1153; VI-LABEL: test_call_external_void_func_v2i64: 1154; VI: ; %bb.0: 1155; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1156; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1157; VI-NEXT: s_mov_b32 s38, -1 1158; VI-NEXT: s_mov_b32 s39, 0xe80000 1159; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1160; VI-NEXT: s_mov_b32 s0, 0 1161; VI-NEXT: s_add_u32 s36, s36, s3 1162; VI-NEXT: s_mov_b32 s3, 0xf000 1163; VI-NEXT: s_mov_b32 s2, -1 1164; VI-NEXT: s_mov_b32 s1, s0 1165; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1166; VI-NEXT: s_addc_u32 s37, s37, 0 1167; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1168; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1169; VI-NEXT: s_mov_b32 s32, 0 1170; VI-NEXT: s_getpc_b64 s[4:5] 1171; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1172; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1173; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1174; VI-NEXT: s_endpgm 1175; 1176; CI-LABEL: test_call_external_void_func_v2i64: 1177; CI: ; %bb.0: 1178; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1179; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1180; CI-NEXT: s_mov_b32 s38, -1 1181; CI-NEXT: s_mov_b32 s39, 0xe8f000 1182; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1183; CI-NEXT: s_mov_b32 s0, 0 1184; CI-NEXT: s_add_u32 s36, s36, s3 1185; CI-NEXT: s_mov_b32 s3, 0xf000 1186; CI-NEXT: s_mov_b32 s2, -1 1187; CI-NEXT: s_mov_b32 s1, s0 1188; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1189; CI-NEXT: s_addc_u32 s37, s37, 0 1190; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1191; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1192; CI-NEXT: s_mov_b32 s32, 0 1193; CI-NEXT: s_getpc_b64 s[4:5] 1194; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1195; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1196; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1197; CI-NEXT: s_endpgm 1198; 1199; GFX9-LABEL: test_call_external_void_func_v2i64: 1200; GFX9: ; %bb.0: 1201; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1202; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1203; GFX9-NEXT: s_mov_b32 s38, -1 1204; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1205; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1206; GFX9-NEXT: s_mov_b32 s0, 0 1207; GFX9-NEXT: s_add_u32 s36, s36, s3 1208; GFX9-NEXT: s_mov_b32 s3, 0xf000 1209; GFX9-NEXT: s_mov_b32 s2, -1 1210; GFX9-NEXT: s_mov_b32 s1, s0 1211; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1212; GFX9-NEXT: s_addc_u32 s37, s37, 0 1213; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1214; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1215; GFX9-NEXT: s_mov_b32 s32, 0 1216; GFX9-NEXT: s_getpc_b64 s[4:5] 1217; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1218; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1219; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1220; GFX9-NEXT: s_endpgm 1221; 1222; GFX11-LABEL: test_call_external_void_func_v2i64: 1223; GFX11: ; %bb.0: 1224; GFX11-NEXT: s_mov_b32 s4, 0 1225; GFX11-NEXT: s_mov_b32 s7, 0x31016000 1226; GFX11-NEXT: s_mov_b32 s6, -1 1227; GFX11-NEXT: s_mov_b32 s5, s4 1228; GFX11-NEXT: s_mov_b32 s32, 0 1229; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 1230; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1231; GFX11-NEXT: s_getpc_b64 s[2:3] 1232; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 1233; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 1234; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1235; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1236; GFX11-NEXT: s_endpgm 1237; 1238; HSA-LABEL: test_call_external_void_func_v2i64: 1239; HSA: ; %bb.0: 1240; HSA-NEXT: s_add_i32 s6, s6, s9 1241; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1242; HSA-NEXT: s_mov_b32 s8, 0 1243; HSA-NEXT: s_add_u32 s0, s0, s9 1244; HSA-NEXT: s_mov_b32 s11, 0x1100f000 1245; HSA-NEXT: s_mov_b32 s10, -1 1246; HSA-NEXT: s_mov_b32 s9, s8 1247; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1248; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1249; HSA-NEXT: s_addc_u32 s1, s1, 0 1250; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1251; HSA-NEXT: s_mov_b32 s32, 0 1252; HSA-NEXT: s_getpc_b64 s[8:9] 1253; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4 1254; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12 1255; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1256; HSA-NEXT: s_endpgm 1257 %val = load <2 x i64>, ptr addrspace(1) null 1258 call void @external_void_func_v2i64(<2 x i64> %val) 1259 ret void 1260} 1261 1262define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { 1263; VI-LABEL: test_call_external_void_func_v2i64_imm: 1264; VI: ; %bb.0: 1265; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1266; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1267; VI-NEXT: s_mov_b32 s38, -1 1268; VI-NEXT: s_mov_b32 s39, 0xe80000 1269; VI-NEXT: s_add_u32 s36, s36, s3 1270; VI-NEXT: s_addc_u32 s37, s37, 0 1271; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1272; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1273; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1274; VI-NEXT: v_mov_b32_e32 v0, 1 1275; VI-NEXT: v_mov_b32_e32 v1, 2 1276; VI-NEXT: v_mov_b32_e32 v2, 3 1277; VI-NEXT: v_mov_b32_e32 v3, 4 1278; VI-NEXT: s_mov_b32 s32, 0 1279; VI-NEXT: s_getpc_b64 s[4:5] 1280; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1281; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1282; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1283; VI-NEXT: s_endpgm 1284; 1285; CI-LABEL: test_call_external_void_func_v2i64_imm: 1286; CI: ; %bb.0: 1287; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1288; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1289; CI-NEXT: s_mov_b32 s38, -1 1290; CI-NEXT: s_mov_b32 s39, 0xe8f000 1291; CI-NEXT: s_add_u32 s36, s36, s3 1292; CI-NEXT: s_addc_u32 s37, s37, 0 1293; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1294; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1295; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1296; CI-NEXT: v_mov_b32_e32 v0, 1 1297; CI-NEXT: v_mov_b32_e32 v1, 2 1298; CI-NEXT: v_mov_b32_e32 v2, 3 1299; CI-NEXT: v_mov_b32_e32 v3, 4 1300; CI-NEXT: s_mov_b32 s32, 0 1301; CI-NEXT: s_getpc_b64 s[4:5] 1302; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1303; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1304; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1305; CI-NEXT: s_endpgm 1306; 1307; GFX9-LABEL: test_call_external_void_func_v2i64_imm: 1308; GFX9: ; %bb.0: 1309; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1310; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1311; GFX9-NEXT: s_mov_b32 s38, -1 1312; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1313; GFX9-NEXT: s_add_u32 s36, s36, s3 1314; GFX9-NEXT: s_addc_u32 s37, s37, 0 1315; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1316; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1317; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1318; GFX9-NEXT: v_mov_b32_e32 v0, 1 1319; GFX9-NEXT: v_mov_b32_e32 v1, 2 1320; GFX9-NEXT: v_mov_b32_e32 v2, 3 1321; GFX9-NEXT: v_mov_b32_e32 v3, 4 1322; GFX9-NEXT: s_mov_b32 s32, 0 1323; GFX9-NEXT: s_getpc_b64 s[4:5] 1324; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4 1325; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12 1326; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1327; GFX9-NEXT: s_endpgm 1328; 1329; GFX11-LABEL: test_call_external_void_func_v2i64_imm: 1330; GFX11: ; %bb.0: 1331; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 1332; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 1333; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1334; GFX11-NEXT: s_mov_b32 s32, 0 1335; GFX11-NEXT: s_getpc_b64 s[2:3] 1336; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4 1337; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12 1338; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1339; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1340; GFX11-NEXT: s_endpgm 1341; 1342; HSA-LABEL: test_call_external_void_func_v2i64_imm: 1343; HSA: ; %bb.0: 1344; HSA-NEXT: s_add_i32 s6, s6, s9 1345; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1346; HSA-NEXT: s_add_u32 s0, s0, s9 1347; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1348; HSA-NEXT: s_addc_u32 s1, s1, 0 1349; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1350; HSA-NEXT: v_mov_b32_e32 v0, 1 1351; HSA-NEXT: v_mov_b32_e32 v1, 2 1352; HSA-NEXT: v_mov_b32_e32 v2, 3 1353; HSA-NEXT: v_mov_b32_e32 v3, 4 1354; HSA-NEXT: s_mov_b32 s32, 0 1355; HSA-NEXT: s_getpc_b64 s[8:9] 1356; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4 1357; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12 1358; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1359; HSA-NEXT: s_endpgm 1360 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>) 1361 ret void 1362} 1363 1364define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { 1365; VI-LABEL: test_call_external_void_func_v3i64: 1366; VI: ; %bb.0: 1367; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1368; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1369; VI-NEXT: s_mov_b32 s38, -1 1370; VI-NEXT: s_mov_b32 s39, 0xe80000 1371; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1372; VI-NEXT: s_mov_b32 s0, 0 1373; VI-NEXT: s_add_u32 s36, s36, s3 1374; VI-NEXT: s_mov_b32 s3, 0xf000 1375; VI-NEXT: s_mov_b32 s2, -1 1376; VI-NEXT: s_mov_b32 s1, s0 1377; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1378; VI-NEXT: s_addc_u32 s37, s37, 0 1379; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1380; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1381; VI-NEXT: v_mov_b32_e32 v4, 1 1382; VI-NEXT: v_mov_b32_e32 v5, 2 1383; VI-NEXT: s_mov_b32 s32, 0 1384; VI-NEXT: s_getpc_b64 s[4:5] 1385; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 1386; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 1387; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1388; VI-NEXT: s_endpgm 1389; 1390; CI-LABEL: test_call_external_void_func_v3i64: 1391; CI: ; %bb.0: 1392; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1393; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1394; CI-NEXT: s_mov_b32 s38, -1 1395; CI-NEXT: s_mov_b32 s39, 0xe8f000 1396; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1397; CI-NEXT: s_mov_b32 s0, 0 1398; CI-NEXT: s_add_u32 s36, s36, s3 1399; CI-NEXT: s_mov_b32 s3, 0xf000 1400; CI-NEXT: s_mov_b32 s2, -1 1401; CI-NEXT: s_mov_b32 s1, s0 1402; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1403; CI-NEXT: s_addc_u32 s37, s37, 0 1404; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1405; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1406; CI-NEXT: v_mov_b32_e32 v4, 1 1407; CI-NEXT: v_mov_b32_e32 v5, 2 1408; CI-NEXT: s_mov_b32 s32, 0 1409; CI-NEXT: s_getpc_b64 s[4:5] 1410; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 1411; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 1412; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1413; CI-NEXT: s_endpgm 1414; 1415; GFX9-LABEL: test_call_external_void_func_v3i64: 1416; GFX9: ; %bb.0: 1417; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1418; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1419; GFX9-NEXT: s_mov_b32 s38, -1 1420; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1421; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1422; GFX9-NEXT: s_mov_b32 s0, 0 1423; GFX9-NEXT: s_add_u32 s36, s36, s3 1424; GFX9-NEXT: s_mov_b32 s3, 0xf000 1425; GFX9-NEXT: s_mov_b32 s2, -1 1426; GFX9-NEXT: s_mov_b32 s1, s0 1427; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1428; GFX9-NEXT: s_addc_u32 s37, s37, 0 1429; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1430; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1431; GFX9-NEXT: v_mov_b32_e32 v4, 1 1432; GFX9-NEXT: v_mov_b32_e32 v5, 2 1433; GFX9-NEXT: s_mov_b32 s32, 0 1434; GFX9-NEXT: s_getpc_b64 s[4:5] 1435; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4 1436; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12 1437; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1438; GFX9-NEXT: s_endpgm 1439; 1440; GFX11-LABEL: test_call_external_void_func_v3i64: 1441; GFX11: ; %bb.0: 1442; GFX11-NEXT: s_mov_b32 s4, 0 1443; GFX11-NEXT: s_mov_b32 s7, 0x31016000 1444; GFX11-NEXT: s_mov_b32 s6, -1 1445; GFX11-NEXT: s_mov_b32 s5, s4 1446; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 1447; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 1448; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1449; GFX11-NEXT: s_mov_b32 s32, 0 1450; GFX11-NEXT: s_getpc_b64 s[2:3] 1451; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i64@rel32@lo+4 1452; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i64@rel32@hi+12 1453; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1454; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1455; GFX11-NEXT: s_endpgm 1456; 1457; HSA-LABEL: test_call_external_void_func_v3i64: 1458; HSA: ; %bb.0: 1459; HSA-NEXT: s_add_i32 s6, s6, s9 1460; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1461; HSA-NEXT: s_mov_b32 s8, 0 1462; HSA-NEXT: s_add_u32 s0, s0, s9 1463; HSA-NEXT: s_mov_b32 s11, 0x1100f000 1464; HSA-NEXT: s_mov_b32 s10, -1 1465; HSA-NEXT: s_mov_b32 s9, s8 1466; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1467; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1468; HSA-NEXT: s_addc_u32 s1, s1, 0 1469; HSA-NEXT: v_mov_b32_e32 v4, 1 1470; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1471; HSA-NEXT: v_mov_b32_e32 v5, 2 1472; HSA-NEXT: s_mov_b32 s32, 0 1473; HSA-NEXT: s_getpc_b64 s[8:9] 1474; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i64@rel32@lo+4 1475; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i64@rel32@hi+12 1476; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1477; HSA-NEXT: s_endpgm 1478 %load = load <2 x i64>, ptr addrspace(1) null 1479 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> 1480 1481 call void @external_void_func_v3i64(<3 x i64> %val) 1482 ret void 1483} 1484 1485define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { 1486; VI-LABEL: test_call_external_void_func_v4i64: 1487; VI: ; %bb.0: 1488; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1489; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1490; VI-NEXT: s_mov_b32 s38, -1 1491; VI-NEXT: s_mov_b32 s39, 0xe80000 1492; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1493; VI-NEXT: s_mov_b32 s0, 0 1494; VI-NEXT: s_add_u32 s36, s36, s3 1495; VI-NEXT: s_mov_b32 s3, 0xf000 1496; VI-NEXT: s_mov_b32 s2, -1 1497; VI-NEXT: s_mov_b32 s1, s0 1498; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1499; VI-NEXT: s_addc_u32 s37, s37, 0 1500; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1501; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1502; VI-NEXT: v_mov_b32_e32 v4, 1 1503; VI-NEXT: v_mov_b32_e32 v5, 2 1504; VI-NEXT: v_mov_b32_e32 v6, 3 1505; VI-NEXT: v_mov_b32_e32 v7, 4 1506; VI-NEXT: s_mov_b32 s32, 0 1507; VI-NEXT: s_getpc_b64 s[4:5] 1508; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 1509; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 1510; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1511; VI-NEXT: s_endpgm 1512; 1513; CI-LABEL: test_call_external_void_func_v4i64: 1514; CI: ; %bb.0: 1515; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1516; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1517; CI-NEXT: s_mov_b32 s38, -1 1518; CI-NEXT: s_mov_b32 s39, 0xe8f000 1519; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1520; CI-NEXT: s_mov_b32 s0, 0 1521; CI-NEXT: s_add_u32 s36, s36, s3 1522; CI-NEXT: s_mov_b32 s3, 0xf000 1523; CI-NEXT: s_mov_b32 s2, -1 1524; CI-NEXT: s_mov_b32 s1, s0 1525; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1526; CI-NEXT: s_addc_u32 s37, s37, 0 1527; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1528; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1529; CI-NEXT: v_mov_b32_e32 v4, 1 1530; CI-NEXT: v_mov_b32_e32 v5, 2 1531; CI-NEXT: v_mov_b32_e32 v6, 3 1532; CI-NEXT: v_mov_b32_e32 v7, 4 1533; CI-NEXT: s_mov_b32 s32, 0 1534; CI-NEXT: s_getpc_b64 s[4:5] 1535; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 1536; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 1537; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1538; CI-NEXT: s_endpgm 1539; 1540; GFX9-LABEL: test_call_external_void_func_v4i64: 1541; GFX9: ; %bb.0: 1542; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1543; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1544; GFX9-NEXT: s_mov_b32 s38, -1 1545; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1546; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1547; GFX9-NEXT: s_mov_b32 s0, 0 1548; GFX9-NEXT: s_add_u32 s36, s36, s3 1549; GFX9-NEXT: s_mov_b32 s3, 0xf000 1550; GFX9-NEXT: s_mov_b32 s2, -1 1551; GFX9-NEXT: s_mov_b32 s1, s0 1552; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 1553; GFX9-NEXT: s_addc_u32 s37, s37, 0 1554; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1555; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1556; GFX9-NEXT: v_mov_b32_e32 v4, 1 1557; GFX9-NEXT: v_mov_b32_e32 v5, 2 1558; GFX9-NEXT: v_mov_b32_e32 v6, 3 1559; GFX9-NEXT: v_mov_b32_e32 v7, 4 1560; GFX9-NEXT: s_mov_b32 s32, 0 1561; GFX9-NEXT: s_getpc_b64 s[4:5] 1562; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4 1563; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12 1564; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1565; GFX9-NEXT: s_endpgm 1566; 1567; GFX11-LABEL: test_call_external_void_func_v4i64: 1568; GFX11: ; %bb.0: 1569; GFX11-NEXT: s_mov_b32 s4, 0 1570; GFX11-NEXT: s_mov_b32 s7, 0x31016000 1571; GFX11-NEXT: s_mov_b32 s6, -1 1572; GFX11-NEXT: s_mov_b32 s5, s4 1573; GFX11-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2 1574; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 1575; GFX11-NEXT: v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4 1576; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1577; GFX11-NEXT: s_mov_b32 s32, 0 1578; GFX11-NEXT: s_getpc_b64 s[2:3] 1579; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i64@rel32@lo+4 1580; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i64@rel32@hi+12 1581; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1582; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1583; GFX11-NEXT: s_endpgm 1584; 1585; HSA-LABEL: test_call_external_void_func_v4i64: 1586; HSA: ; %bb.0: 1587; HSA-NEXT: s_add_i32 s6, s6, s9 1588; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1589; HSA-NEXT: s_mov_b32 s8, 0 1590; HSA-NEXT: s_add_u32 s0, s0, s9 1591; HSA-NEXT: s_mov_b32 s11, 0x1100f000 1592; HSA-NEXT: s_mov_b32 s10, -1 1593; HSA-NEXT: s_mov_b32 s9, s8 1594; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1595; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1596; HSA-NEXT: s_addc_u32 s1, s1, 0 1597; HSA-NEXT: v_mov_b32_e32 v4, 1 1598; HSA-NEXT: v_mov_b32_e32 v5, 2 1599; HSA-NEXT: v_mov_b32_e32 v6, 3 1600; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1601; HSA-NEXT: v_mov_b32_e32 v7, 4 1602; HSA-NEXT: s_mov_b32 s32, 0 1603; HSA-NEXT: s_getpc_b64 s[8:9] 1604; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i64@rel32@lo+4 1605; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i64@rel32@hi+12 1606; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1607; HSA-NEXT: s_endpgm 1608 %load = load <2 x i64>, ptr addrspace(1) null 1609 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1610 call void @external_void_func_v4i64(<4 x i64> %val) 1611 ret void 1612} 1613 1614define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { 1615; VI-LABEL: test_call_external_void_func_f16_imm: 1616; VI: ; %bb.0: 1617; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1618; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1619; VI-NEXT: s_mov_b32 s38, -1 1620; VI-NEXT: s_mov_b32 s39, 0xe80000 1621; VI-NEXT: s_add_u32 s36, s36, s3 1622; VI-NEXT: s_addc_u32 s37, s37, 0 1623; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1624; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1625; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1626; VI-NEXT: v_mov_b32_e32 v0, 0x4400 1627; VI-NEXT: s_mov_b32 s32, 0 1628; VI-NEXT: s_getpc_b64 s[4:5] 1629; VI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 1630; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 1631; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1632; VI-NEXT: s_endpgm 1633; 1634; CI-LABEL: test_call_external_void_func_f16_imm: 1635; CI: ; %bb.0: 1636; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1637; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1638; CI-NEXT: s_mov_b32 s38, -1 1639; CI-NEXT: s_mov_b32 s39, 0xe8f000 1640; CI-NEXT: s_add_u32 s36, s36, s3 1641; CI-NEXT: s_addc_u32 s37, s37, 0 1642; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1643; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1644; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1645; CI-NEXT: v_mov_b32_e32 v0, 4.0 1646; CI-NEXT: s_mov_b32 s32, 0 1647; CI-NEXT: s_getpc_b64 s[4:5] 1648; CI-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 1649; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 1650; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1651; CI-NEXT: s_endpgm 1652; 1653; GFX9-LABEL: test_call_external_void_func_f16_imm: 1654; GFX9: ; %bb.0: 1655; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1656; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1657; GFX9-NEXT: s_mov_b32 s38, -1 1658; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1659; GFX9-NEXT: s_add_u32 s36, s36, s3 1660; GFX9-NEXT: s_addc_u32 s37, s37, 0 1661; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1662; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1663; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1664; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 1665; GFX9-NEXT: s_mov_b32 s32, 0 1666; GFX9-NEXT: s_getpc_b64 s[4:5] 1667; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4 1668; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12 1669; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1670; GFX9-NEXT: s_endpgm 1671; 1672; GFX11-LABEL: test_call_external_void_func_f16_imm: 1673; GFX11: ; %bb.0: 1674; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 1675; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1676; GFX11-NEXT: s_mov_b32 s32, 0 1677; GFX11-NEXT: s_getpc_b64 s[2:3] 1678; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4 1679; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12 1680; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1681; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1682; GFX11-NEXT: s_endpgm 1683; 1684; HSA-LABEL: test_call_external_void_func_f16_imm: 1685; HSA: ; %bb.0: 1686; HSA-NEXT: s_add_i32 s6, s6, s9 1687; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1688; HSA-NEXT: s_add_u32 s0, s0, s9 1689; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1690; HSA-NEXT: s_addc_u32 s1, s1, 0 1691; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1692; HSA-NEXT: v_mov_b32_e32 v0, 0x4400 1693; HSA-NEXT: s_mov_b32 s32, 0 1694; HSA-NEXT: s_getpc_b64 s[8:9] 1695; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f16@rel32@lo+4 1696; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f16@rel32@hi+12 1697; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1698; HSA-NEXT: s_endpgm 1699 call void @external_void_func_f16(half 4.0) 1700 ret void 1701} 1702 1703define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { 1704; VI-LABEL: test_call_external_void_func_f32_imm: 1705; VI: ; %bb.0: 1706; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1707; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1708; VI-NEXT: s_mov_b32 s38, -1 1709; VI-NEXT: s_mov_b32 s39, 0xe80000 1710; VI-NEXT: s_add_u32 s36, s36, s3 1711; VI-NEXT: s_addc_u32 s37, s37, 0 1712; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1713; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1714; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1715; VI-NEXT: v_mov_b32_e32 v0, 4.0 1716; VI-NEXT: s_mov_b32 s32, 0 1717; VI-NEXT: s_getpc_b64 s[4:5] 1718; VI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 1719; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 1720; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1721; VI-NEXT: s_endpgm 1722; 1723; CI-LABEL: test_call_external_void_func_f32_imm: 1724; CI: ; %bb.0: 1725; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1726; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1727; CI-NEXT: s_mov_b32 s38, -1 1728; CI-NEXT: s_mov_b32 s39, 0xe8f000 1729; CI-NEXT: s_add_u32 s36, s36, s3 1730; CI-NEXT: s_addc_u32 s37, s37, 0 1731; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1732; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1733; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1734; CI-NEXT: v_mov_b32_e32 v0, 4.0 1735; CI-NEXT: s_mov_b32 s32, 0 1736; CI-NEXT: s_getpc_b64 s[4:5] 1737; CI-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 1738; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 1739; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1740; CI-NEXT: s_endpgm 1741; 1742; GFX9-LABEL: test_call_external_void_func_f32_imm: 1743; GFX9: ; %bb.0: 1744; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1745; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1746; GFX9-NEXT: s_mov_b32 s38, -1 1747; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1748; GFX9-NEXT: s_add_u32 s36, s36, s3 1749; GFX9-NEXT: s_addc_u32 s37, s37, 0 1750; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1751; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1752; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1753; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 1754; GFX9-NEXT: s_mov_b32 s32, 0 1755; GFX9-NEXT: s_getpc_b64 s[4:5] 1756; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4 1757; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12 1758; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1759; GFX9-NEXT: s_endpgm 1760; 1761; GFX11-LABEL: test_call_external_void_func_f32_imm: 1762; GFX11: ; %bb.0: 1763; GFX11-NEXT: v_mov_b32_e32 v0, 4.0 1764; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1765; GFX11-NEXT: s_mov_b32 s32, 0 1766; GFX11-NEXT: s_getpc_b64 s[2:3] 1767; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f32@rel32@lo+4 1768; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f32@rel32@hi+12 1769; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1770; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1771; GFX11-NEXT: s_endpgm 1772; 1773; HSA-LABEL: test_call_external_void_func_f32_imm: 1774; HSA: ; %bb.0: 1775; HSA-NEXT: s_add_i32 s6, s6, s9 1776; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1777; HSA-NEXT: s_add_u32 s0, s0, s9 1778; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1779; HSA-NEXT: s_addc_u32 s1, s1, 0 1780; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1781; HSA-NEXT: v_mov_b32_e32 v0, 4.0 1782; HSA-NEXT: s_mov_b32 s32, 0 1783; HSA-NEXT: s_getpc_b64 s[8:9] 1784; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f32@rel32@lo+4 1785; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f32@rel32@hi+12 1786; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1787; HSA-NEXT: s_endpgm 1788 call void @external_void_func_f32(float 4.0) 1789 ret void 1790} 1791 1792define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { 1793; VI-LABEL: test_call_external_void_func_v2f32_imm: 1794; VI: ; %bb.0: 1795; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1796; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1797; VI-NEXT: s_mov_b32 s38, -1 1798; VI-NEXT: s_mov_b32 s39, 0xe80000 1799; VI-NEXT: s_add_u32 s36, s36, s3 1800; VI-NEXT: s_addc_u32 s37, s37, 0 1801; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1802; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1803; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1804; VI-NEXT: v_mov_b32_e32 v0, 1.0 1805; VI-NEXT: v_mov_b32_e32 v1, 2.0 1806; VI-NEXT: s_mov_b32 s32, 0 1807; VI-NEXT: s_getpc_b64 s[4:5] 1808; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 1809; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 1810; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1811; VI-NEXT: s_endpgm 1812; 1813; CI-LABEL: test_call_external_void_func_v2f32_imm: 1814; CI: ; %bb.0: 1815; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1816; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1817; CI-NEXT: s_mov_b32 s38, -1 1818; CI-NEXT: s_mov_b32 s39, 0xe8f000 1819; CI-NEXT: s_add_u32 s36, s36, s3 1820; CI-NEXT: s_addc_u32 s37, s37, 0 1821; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1822; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1823; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1824; CI-NEXT: v_mov_b32_e32 v0, 1.0 1825; CI-NEXT: v_mov_b32_e32 v1, 2.0 1826; CI-NEXT: s_mov_b32 s32, 0 1827; CI-NEXT: s_getpc_b64 s[4:5] 1828; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 1829; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 1830; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1831; CI-NEXT: s_endpgm 1832; 1833; GFX9-LABEL: test_call_external_void_func_v2f32_imm: 1834; GFX9: ; %bb.0: 1835; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1836; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1837; GFX9-NEXT: s_mov_b32 s38, -1 1838; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1839; GFX9-NEXT: s_add_u32 s36, s36, s3 1840; GFX9-NEXT: s_addc_u32 s37, s37, 0 1841; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1842; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1843; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1844; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 1845; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 1846; GFX9-NEXT: s_mov_b32 s32, 0 1847; GFX9-NEXT: s_getpc_b64 s[4:5] 1848; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4 1849; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12 1850; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1851; GFX9-NEXT: s_endpgm 1852; 1853; GFX11-LABEL: test_call_external_void_func_v2f32_imm: 1854; GFX11: ; %bb.0: 1855; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 1856; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1857; GFX11-NEXT: s_mov_b32 s32, 0 1858; GFX11-NEXT: s_getpc_b64 s[2:3] 1859; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f32@rel32@lo+4 1860; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f32@rel32@hi+12 1861; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1862; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1863; GFX11-NEXT: s_endpgm 1864; 1865; HSA-LABEL: test_call_external_void_func_v2f32_imm: 1866; HSA: ; %bb.0: 1867; HSA-NEXT: s_add_i32 s6, s6, s9 1868; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1869; HSA-NEXT: s_add_u32 s0, s0, s9 1870; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1871; HSA-NEXT: s_addc_u32 s1, s1, 0 1872; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1873; HSA-NEXT: v_mov_b32_e32 v0, 1.0 1874; HSA-NEXT: v_mov_b32_e32 v1, 2.0 1875; HSA-NEXT: s_mov_b32 s32, 0 1876; HSA-NEXT: s_getpc_b64 s[8:9] 1877; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f32@rel32@lo+4 1878; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f32@rel32@hi+12 1879; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1880; HSA-NEXT: s_endpgm 1881 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>) 1882 ret void 1883} 1884 1885define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { 1886; VI-LABEL: test_call_external_void_func_v3f32_imm: 1887; VI: ; %bb.0: 1888; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1889; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1890; VI-NEXT: s_mov_b32 s38, -1 1891; VI-NEXT: s_mov_b32 s39, 0xe80000 1892; VI-NEXT: s_add_u32 s36, s36, s3 1893; VI-NEXT: s_addc_u32 s37, s37, 0 1894; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1895; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1896; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1897; VI-NEXT: v_mov_b32_e32 v0, 1.0 1898; VI-NEXT: v_mov_b32_e32 v1, 2.0 1899; VI-NEXT: v_mov_b32_e32 v2, 4.0 1900; VI-NEXT: s_mov_b32 s32, 0 1901; VI-NEXT: s_getpc_b64 s[4:5] 1902; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 1903; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 1904; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1905; VI-NEXT: s_endpgm 1906; 1907; CI-LABEL: test_call_external_void_func_v3f32_imm: 1908; CI: ; %bb.0: 1909; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1910; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1911; CI-NEXT: s_mov_b32 s38, -1 1912; CI-NEXT: s_mov_b32 s39, 0xe8f000 1913; CI-NEXT: s_add_u32 s36, s36, s3 1914; CI-NEXT: s_addc_u32 s37, s37, 0 1915; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 1916; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 1917; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 1918; CI-NEXT: v_mov_b32_e32 v0, 1.0 1919; CI-NEXT: v_mov_b32_e32 v1, 2.0 1920; CI-NEXT: v_mov_b32_e32 v2, 4.0 1921; CI-NEXT: s_mov_b32 s32, 0 1922; CI-NEXT: s_getpc_b64 s[4:5] 1923; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 1924; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 1925; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 1926; CI-NEXT: s_endpgm 1927; 1928; GFX9-LABEL: test_call_external_void_func_v3f32_imm: 1929; GFX9: ; %bb.0: 1930; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1931; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1932; GFX9-NEXT: s_mov_b32 s38, -1 1933; GFX9-NEXT: s_mov_b32 s39, 0xe00000 1934; GFX9-NEXT: s_add_u32 s36, s36, s3 1935; GFX9-NEXT: s_addc_u32 s37, s37, 0 1936; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 1937; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 1938; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 1939; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 1940; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 1941; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 1942; GFX9-NEXT: s_mov_b32 s32, 0 1943; GFX9-NEXT: s_getpc_b64 s[4:5] 1944; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4 1945; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12 1946; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 1947; GFX9-NEXT: s_endpgm 1948; 1949; GFX11-LABEL: test_call_external_void_func_v3f32_imm: 1950; GFX11: ; %bb.0: 1951; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 1952; GFX11-NEXT: v_mov_b32_e32 v2, 4.0 1953; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 1954; GFX11-NEXT: s_mov_b32 s32, 0 1955; GFX11-NEXT: s_getpc_b64 s[2:3] 1956; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f32@rel32@lo+4 1957; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f32@rel32@hi+12 1958; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1959; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 1960; GFX11-NEXT: s_endpgm 1961; 1962; HSA-LABEL: test_call_external_void_func_v3f32_imm: 1963; HSA: ; %bb.0: 1964; HSA-NEXT: s_add_i32 s6, s6, s9 1965; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 1966; HSA-NEXT: s_add_u32 s0, s0, s9 1967; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 1968; HSA-NEXT: s_addc_u32 s1, s1, 0 1969; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 1970; HSA-NEXT: v_mov_b32_e32 v0, 1.0 1971; HSA-NEXT: v_mov_b32_e32 v1, 2.0 1972; HSA-NEXT: v_mov_b32_e32 v2, 4.0 1973; HSA-NEXT: s_mov_b32 s32, 0 1974; HSA-NEXT: s_getpc_b64 s[8:9] 1975; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f32@rel32@lo+4 1976; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f32@rel32@hi+12 1977; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 1978; HSA-NEXT: s_endpgm 1979 call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>) 1980 ret void 1981} 1982 1983define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { 1984; VI-LABEL: test_call_external_void_func_v5f32_imm: 1985; VI: ; %bb.0: 1986; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 1987; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 1988; VI-NEXT: s_mov_b32 s38, -1 1989; VI-NEXT: s_mov_b32 s39, 0xe80000 1990; VI-NEXT: s_add_u32 s36, s36, s3 1991; VI-NEXT: s_addc_u32 s37, s37, 0 1992; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 1993; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 1994; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 1995; VI-NEXT: v_mov_b32_e32 v0, 1.0 1996; VI-NEXT: v_mov_b32_e32 v1, 2.0 1997; VI-NEXT: v_mov_b32_e32 v2, 4.0 1998; VI-NEXT: v_mov_b32_e32 v3, -1.0 1999; VI-NEXT: v_mov_b32_e32 v4, 0.5 2000; VI-NEXT: s_mov_b32 s32, 0 2001; VI-NEXT: s_getpc_b64 s[4:5] 2002; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 2003; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 2004; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2005; VI-NEXT: s_endpgm 2006; 2007; CI-LABEL: test_call_external_void_func_v5f32_imm: 2008; CI: ; %bb.0: 2009; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2010; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2011; CI-NEXT: s_mov_b32 s38, -1 2012; CI-NEXT: s_mov_b32 s39, 0xe8f000 2013; CI-NEXT: s_add_u32 s36, s36, s3 2014; CI-NEXT: s_addc_u32 s37, s37, 0 2015; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2016; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2017; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2018; CI-NEXT: v_mov_b32_e32 v0, 1.0 2019; CI-NEXT: v_mov_b32_e32 v1, 2.0 2020; CI-NEXT: v_mov_b32_e32 v2, 4.0 2021; CI-NEXT: v_mov_b32_e32 v3, -1.0 2022; CI-NEXT: v_mov_b32_e32 v4, 0.5 2023; CI-NEXT: s_mov_b32 s32, 0 2024; CI-NEXT: s_getpc_b64 s[4:5] 2025; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 2026; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 2027; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2028; CI-NEXT: s_endpgm 2029; 2030; GFX9-LABEL: test_call_external_void_func_v5f32_imm: 2031; GFX9: ; %bb.0: 2032; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2033; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2034; GFX9-NEXT: s_mov_b32 s38, -1 2035; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2036; GFX9-NEXT: s_add_u32 s36, s36, s3 2037; GFX9-NEXT: s_addc_u32 s37, s37, 0 2038; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2039; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2040; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2041; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 2042; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2043; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 2044; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 2045; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 2046; GFX9-NEXT: s_mov_b32 s32, 0 2047; GFX9-NEXT: s_getpc_b64 s[4:5] 2048; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4 2049; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12 2050; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2051; GFX9-NEXT: s_endpgm 2052; 2053; GFX11-LABEL: test_call_external_void_func_v5f32_imm: 2054; GFX11: ; %bb.0: 2055; GFX11-NEXT: v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0 2056; GFX11-NEXT: v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0 2057; GFX11-NEXT: v_mov_b32_e32 v4, 0.5 2058; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2059; GFX11-NEXT: s_mov_b32 s32, 0 2060; GFX11-NEXT: s_getpc_b64 s[2:3] 2061; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5f32@rel32@lo+4 2062; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5f32@rel32@hi+12 2063; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2064; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2065; GFX11-NEXT: s_endpgm 2066; 2067; HSA-LABEL: test_call_external_void_func_v5f32_imm: 2068; HSA: ; %bb.0: 2069; HSA-NEXT: s_add_i32 s6, s6, s9 2070; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2071; HSA-NEXT: s_add_u32 s0, s0, s9 2072; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2073; HSA-NEXT: s_addc_u32 s1, s1, 0 2074; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2075; HSA-NEXT: v_mov_b32_e32 v0, 1.0 2076; HSA-NEXT: v_mov_b32_e32 v1, 2.0 2077; HSA-NEXT: v_mov_b32_e32 v2, 4.0 2078; HSA-NEXT: v_mov_b32_e32 v3, -1.0 2079; HSA-NEXT: v_mov_b32_e32 v4, 0.5 2080; HSA-NEXT: s_mov_b32 s32, 0 2081; HSA-NEXT: s_getpc_b64 s[8:9] 2082; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5f32@rel32@lo+4 2083; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5f32@rel32@hi+12 2084; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2085; HSA-NEXT: s_endpgm 2086 call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>) 2087 ret void 2088} 2089 2090define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { 2091; VI-LABEL: test_call_external_void_func_f64_imm: 2092; VI: ; %bb.0: 2093; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2094; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2095; VI-NEXT: s_mov_b32 s38, -1 2096; VI-NEXT: s_mov_b32 s39, 0xe80000 2097; VI-NEXT: s_add_u32 s36, s36, s3 2098; VI-NEXT: s_addc_u32 s37, s37, 0 2099; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2100; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2101; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2102; VI-NEXT: v_mov_b32_e32 v0, 0 2103; VI-NEXT: v_mov_b32_e32 v1, 0x40100000 2104; VI-NEXT: s_mov_b32 s32, 0 2105; VI-NEXT: s_getpc_b64 s[4:5] 2106; VI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 2107; VI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 2108; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2109; VI-NEXT: s_endpgm 2110; 2111; CI-LABEL: test_call_external_void_func_f64_imm: 2112; CI: ; %bb.0: 2113; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2114; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2115; CI-NEXT: s_mov_b32 s38, -1 2116; CI-NEXT: s_mov_b32 s39, 0xe8f000 2117; CI-NEXT: s_add_u32 s36, s36, s3 2118; CI-NEXT: s_addc_u32 s37, s37, 0 2119; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2120; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2121; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2122; CI-NEXT: v_mov_b32_e32 v0, 0 2123; CI-NEXT: v_mov_b32_e32 v1, 0x40100000 2124; CI-NEXT: s_mov_b32 s32, 0 2125; CI-NEXT: s_getpc_b64 s[4:5] 2126; CI-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 2127; CI-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 2128; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2129; CI-NEXT: s_endpgm 2130; 2131; GFX9-LABEL: test_call_external_void_func_f64_imm: 2132; GFX9: ; %bb.0: 2133; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2134; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2135; GFX9-NEXT: s_mov_b32 s38, -1 2136; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2137; GFX9-NEXT: s_add_u32 s36, s36, s3 2138; GFX9-NEXT: s_addc_u32 s37, s37, 0 2139; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2140; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2141; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2142; GFX9-NEXT: v_mov_b32_e32 v0, 0 2143; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 2144; GFX9-NEXT: s_mov_b32 s32, 0 2145; GFX9-NEXT: s_getpc_b64 s[4:5] 2146; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4 2147; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12 2148; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2149; GFX9-NEXT: s_endpgm 2150; 2151; GFX11-LABEL: test_call_external_void_func_f64_imm: 2152; GFX11: ; %bb.0: 2153; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000 2154; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2155; GFX11-NEXT: s_mov_b32 s32, 0 2156; GFX11-NEXT: s_getpc_b64 s[2:3] 2157; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_f64@rel32@lo+4 2158; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_f64@rel32@hi+12 2159; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2160; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2161; GFX11-NEXT: s_endpgm 2162; 2163; HSA-LABEL: test_call_external_void_func_f64_imm: 2164; HSA: ; %bb.0: 2165; HSA-NEXT: s_add_i32 s6, s6, s9 2166; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2167; HSA-NEXT: s_add_u32 s0, s0, s9 2168; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2169; HSA-NEXT: s_addc_u32 s1, s1, 0 2170; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2171; HSA-NEXT: v_mov_b32_e32 v0, 0 2172; HSA-NEXT: v_mov_b32_e32 v1, 0x40100000 2173; HSA-NEXT: s_mov_b32 s32, 0 2174; HSA-NEXT: s_getpc_b64 s[8:9] 2175; HSA-NEXT: s_add_u32 s8, s8, external_void_func_f64@rel32@lo+4 2176; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_f64@rel32@hi+12 2177; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2178; HSA-NEXT: s_endpgm 2179 call void @external_void_func_f64(double 4.0) 2180 ret void 2181} 2182 2183define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { 2184; VI-LABEL: test_call_external_void_func_v2f64_imm: 2185; VI: ; %bb.0: 2186; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2187; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2188; VI-NEXT: s_mov_b32 s38, -1 2189; VI-NEXT: s_mov_b32 s39, 0xe80000 2190; VI-NEXT: s_add_u32 s36, s36, s3 2191; VI-NEXT: s_addc_u32 s37, s37, 0 2192; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2193; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2194; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2195; VI-NEXT: v_mov_b32_e32 v0, 0 2196; VI-NEXT: v_mov_b32_e32 v1, 2.0 2197; VI-NEXT: v_mov_b32_e32 v2, 0 2198; VI-NEXT: v_mov_b32_e32 v3, 0x40100000 2199; VI-NEXT: s_mov_b32 s32, 0 2200; VI-NEXT: s_getpc_b64 s[4:5] 2201; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 2202; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 2203; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2204; VI-NEXT: s_endpgm 2205; 2206; CI-LABEL: test_call_external_void_func_v2f64_imm: 2207; CI: ; %bb.0: 2208; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2209; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2210; CI-NEXT: s_mov_b32 s38, -1 2211; CI-NEXT: s_mov_b32 s39, 0xe8f000 2212; CI-NEXT: s_add_u32 s36, s36, s3 2213; CI-NEXT: s_addc_u32 s37, s37, 0 2214; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2215; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2216; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2217; CI-NEXT: v_mov_b32_e32 v0, 0 2218; CI-NEXT: v_mov_b32_e32 v1, 2.0 2219; CI-NEXT: v_mov_b32_e32 v2, 0 2220; CI-NEXT: v_mov_b32_e32 v3, 0x40100000 2221; CI-NEXT: s_mov_b32 s32, 0 2222; CI-NEXT: s_getpc_b64 s[4:5] 2223; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 2224; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 2225; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2226; CI-NEXT: s_endpgm 2227; 2228; GFX9-LABEL: test_call_external_void_func_v2f64_imm: 2229; GFX9: ; %bb.0: 2230; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2231; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2232; GFX9-NEXT: s_mov_b32 s38, -1 2233; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2234; GFX9-NEXT: s_add_u32 s36, s36, s3 2235; GFX9-NEXT: s_addc_u32 s37, s37, 0 2236; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2237; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2238; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2239; GFX9-NEXT: v_mov_b32_e32 v0, 0 2240; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2241; GFX9-NEXT: v_mov_b32_e32 v2, 0 2242; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 2243; GFX9-NEXT: s_mov_b32 s32, 0 2244; GFX9-NEXT: s_getpc_b64 s[4:5] 2245; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4 2246; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12 2247; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2248; GFX9-NEXT: s_endpgm 2249; 2250; GFX11-LABEL: test_call_external_void_func_v2f64_imm: 2251; GFX11: ; %bb.0: 2252; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 2253; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 2254; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2255; GFX11-NEXT: s_mov_b32 s32, 0 2256; GFX11-NEXT: s_getpc_b64 s[2:3] 2257; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f64@rel32@lo+4 2258; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f64@rel32@hi+12 2259; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2260; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2261; GFX11-NEXT: s_endpgm 2262; 2263; HSA-LABEL: test_call_external_void_func_v2f64_imm: 2264; HSA: ; %bb.0: 2265; HSA-NEXT: s_add_i32 s6, s6, s9 2266; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2267; HSA-NEXT: s_add_u32 s0, s0, s9 2268; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2269; HSA-NEXT: s_addc_u32 s1, s1, 0 2270; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2271; HSA-NEXT: v_mov_b32_e32 v0, 0 2272; HSA-NEXT: v_mov_b32_e32 v1, 2.0 2273; HSA-NEXT: v_mov_b32_e32 v2, 0 2274; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000 2275; HSA-NEXT: s_mov_b32 s32, 0 2276; HSA-NEXT: s_getpc_b64 s[8:9] 2277; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f64@rel32@lo+4 2278; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f64@rel32@hi+12 2279; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2280; HSA-NEXT: s_endpgm 2281 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>) 2282 ret void 2283} 2284 2285define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { 2286; VI-LABEL: test_call_external_void_func_v3f64_imm: 2287; VI: ; %bb.0: 2288; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2289; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2290; VI-NEXT: s_mov_b32 s38, -1 2291; VI-NEXT: s_mov_b32 s39, 0xe80000 2292; VI-NEXT: s_add_u32 s36, s36, s3 2293; VI-NEXT: s_addc_u32 s37, s37, 0 2294; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2295; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2296; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2297; VI-NEXT: v_mov_b32_e32 v0, 0 2298; VI-NEXT: v_mov_b32_e32 v1, 2.0 2299; VI-NEXT: v_mov_b32_e32 v2, 0 2300; VI-NEXT: v_mov_b32_e32 v3, 0x40100000 2301; VI-NEXT: v_mov_b32_e32 v4, 0 2302; VI-NEXT: v_mov_b32_e32 v5, 0x40200000 2303; VI-NEXT: s_mov_b32 s32, 0 2304; VI-NEXT: s_getpc_b64 s[4:5] 2305; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 2306; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 2307; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2308; VI-NEXT: s_endpgm 2309; 2310; CI-LABEL: test_call_external_void_func_v3f64_imm: 2311; CI: ; %bb.0: 2312; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2313; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2314; CI-NEXT: s_mov_b32 s38, -1 2315; CI-NEXT: s_mov_b32 s39, 0xe8f000 2316; CI-NEXT: s_add_u32 s36, s36, s3 2317; CI-NEXT: s_addc_u32 s37, s37, 0 2318; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2319; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2320; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2321; CI-NEXT: v_mov_b32_e32 v0, 0 2322; CI-NEXT: v_mov_b32_e32 v1, 2.0 2323; CI-NEXT: v_mov_b32_e32 v2, 0 2324; CI-NEXT: v_mov_b32_e32 v3, 0x40100000 2325; CI-NEXT: v_mov_b32_e32 v4, 0 2326; CI-NEXT: v_mov_b32_e32 v5, 0x40200000 2327; CI-NEXT: s_mov_b32 s32, 0 2328; CI-NEXT: s_getpc_b64 s[4:5] 2329; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 2330; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 2331; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2332; CI-NEXT: s_endpgm 2333; 2334; GFX9-LABEL: test_call_external_void_func_v3f64_imm: 2335; GFX9: ; %bb.0: 2336; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2337; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2338; GFX9-NEXT: s_mov_b32 s38, -1 2339; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2340; GFX9-NEXT: s_add_u32 s36, s36, s3 2341; GFX9-NEXT: s_addc_u32 s37, s37, 0 2342; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2343; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2344; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2345; GFX9-NEXT: v_mov_b32_e32 v0, 0 2346; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 2347; GFX9-NEXT: v_mov_b32_e32 v2, 0 2348; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 2349; GFX9-NEXT: v_mov_b32_e32 v4, 0 2350; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 2351; GFX9-NEXT: s_mov_b32 s32, 0 2352; GFX9-NEXT: s_getpc_b64 s[4:5] 2353; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4 2354; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12 2355; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2356; GFX9-NEXT: s_endpgm 2357; 2358; GFX11-LABEL: test_call_external_void_func_v3f64_imm: 2359; GFX11: ; %bb.0: 2360; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0 2361; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000 2362; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000 2363; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2364; GFX11-NEXT: s_mov_b32 s32, 0 2365; GFX11-NEXT: s_getpc_b64 s[2:3] 2366; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f64@rel32@lo+4 2367; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f64@rel32@hi+12 2368; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2369; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2370; GFX11-NEXT: s_endpgm 2371; 2372; HSA-LABEL: test_call_external_void_func_v3f64_imm: 2373; HSA: ; %bb.0: 2374; HSA-NEXT: s_add_i32 s6, s6, s9 2375; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2376; HSA-NEXT: s_add_u32 s0, s0, s9 2377; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2378; HSA-NEXT: s_addc_u32 s1, s1, 0 2379; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2380; HSA-NEXT: v_mov_b32_e32 v0, 0 2381; HSA-NEXT: v_mov_b32_e32 v1, 2.0 2382; HSA-NEXT: v_mov_b32_e32 v2, 0 2383; HSA-NEXT: v_mov_b32_e32 v3, 0x40100000 2384; HSA-NEXT: v_mov_b32_e32 v4, 0 2385; HSA-NEXT: v_mov_b32_e32 v5, 0x40200000 2386; HSA-NEXT: s_mov_b32 s32, 0 2387; HSA-NEXT: s_getpc_b64 s[8:9] 2388; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f64@rel32@lo+4 2389; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f64@rel32@hi+12 2390; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2391; HSA-NEXT: s_endpgm 2392 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>) 2393 ret void 2394} 2395 2396define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { 2397; VI-LABEL: test_call_external_void_func_v2i16: 2398; VI: ; %bb.0: 2399; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2400; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2401; VI-NEXT: s_mov_b32 s38, -1 2402; VI-NEXT: s_mov_b32 s39, 0xe80000 2403; VI-NEXT: s_add_u32 s36, s36, s3 2404; VI-NEXT: s_mov_b32 s3, 0xf000 2405; VI-NEXT: s_mov_b32 s2, -1 2406; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 2407; VI-NEXT: s_addc_u32 s37, s37, 0 2408; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2409; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2410; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2411; VI-NEXT: s_mov_b32 s32, 0 2412; VI-NEXT: s_getpc_b64 s[4:5] 2413; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 2414; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 2415; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2416; VI-NEXT: s_endpgm 2417; 2418; CI-LABEL: test_call_external_void_func_v2i16: 2419; CI: ; %bb.0: 2420; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2421; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2422; CI-NEXT: s_mov_b32 s38, -1 2423; CI-NEXT: s_mov_b32 s39, 0xe8f000 2424; CI-NEXT: s_add_u32 s36, s36, s3 2425; CI-NEXT: s_mov_b32 s3, 0xf000 2426; CI-NEXT: s_mov_b32 s2, -1 2427; CI-NEXT: buffer_load_dword v0, off, s[0:3], 0 2428; CI-NEXT: s_addc_u32 s37, s37, 0 2429; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2430; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2431; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2432; CI-NEXT: s_mov_b32 s32, 0 2433; CI-NEXT: s_getpc_b64 s[4:5] 2434; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 2435; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 2436; CI-NEXT: s_waitcnt vmcnt(0) 2437; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2438; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2439; CI-NEXT: s_endpgm 2440; 2441; GFX9-LABEL: test_call_external_void_func_v2i16: 2442; GFX9: ; %bb.0: 2443; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2444; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2445; GFX9-NEXT: s_mov_b32 s38, -1 2446; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2447; GFX9-NEXT: s_add_u32 s36, s36, s3 2448; GFX9-NEXT: s_mov_b32 s3, 0xf000 2449; GFX9-NEXT: s_mov_b32 s2, -1 2450; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 2451; GFX9-NEXT: s_addc_u32 s37, s37, 0 2452; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2453; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2454; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2455; GFX9-NEXT: s_mov_b32 s32, 0 2456; GFX9-NEXT: s_getpc_b64 s[4:5] 2457; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4 2458; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12 2459; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2460; GFX9-NEXT: s_endpgm 2461; 2462; GFX11-LABEL: test_call_external_void_func_v2i16: 2463; GFX11: ; %bb.0: 2464; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2465; GFX11-NEXT: s_mov_b32 s2, -1 2466; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2467; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 2468; GFX11-NEXT: s_mov_b32 s32, 0 2469; GFX11-NEXT: s_getpc_b64 s[2:3] 2470; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i16@rel32@lo+4 2471; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i16@rel32@hi+12 2472; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2473; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2474; GFX11-NEXT: s_endpgm 2475; 2476; HSA-LABEL: test_call_external_void_func_v2i16: 2477; HSA: ; %bb.0: 2478; HSA-NEXT: s_add_i32 s6, s6, s9 2479; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2480; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2481; HSA-NEXT: s_mov_b32 s7, 0x1100f000 2482; HSA-NEXT: s_mov_b32 s6, -1 2483; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0 2484; HSA-NEXT: s_add_u32 s0, s0, s9 2485; HSA-NEXT: s_addc_u32 s1, s1, 0 2486; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2487; HSA-NEXT: s_mov_b32 s32, 0 2488; HSA-NEXT: s_getpc_b64 s[8:9] 2489; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i16@rel32@lo+4 2490; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i16@rel32@hi+12 2491; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2492; HSA-NEXT: s_endpgm 2493 %val = load <2 x i16>, ptr addrspace(1) undef 2494 call void @external_void_func_v2i16(<2 x i16> %val) 2495 ret void 2496} 2497 2498define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { 2499; VI-LABEL: test_call_external_void_func_v3i16: 2500; VI: ; %bb.0: 2501; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2502; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2503; VI-NEXT: s_mov_b32 s38, -1 2504; VI-NEXT: s_mov_b32 s39, 0xe80000 2505; VI-NEXT: s_add_u32 s36, s36, s3 2506; VI-NEXT: s_mov_b32 s3, 0xf000 2507; VI-NEXT: s_mov_b32 s2, -1 2508; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2509; VI-NEXT: s_addc_u32 s37, s37, 0 2510; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2511; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2512; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2513; VI-NEXT: s_mov_b32 s32, 0 2514; VI-NEXT: s_getpc_b64 s[4:5] 2515; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2516; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2517; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2518; VI-NEXT: s_endpgm 2519; 2520; CI-LABEL: test_call_external_void_func_v3i16: 2521; CI: ; %bb.0: 2522; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2523; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2524; CI-NEXT: s_mov_b32 s38, -1 2525; CI-NEXT: s_mov_b32 s39, 0xe8f000 2526; CI-NEXT: s_add_u32 s36, s36, s3 2527; CI-NEXT: s_mov_b32 s3, 0xf000 2528; CI-NEXT: s_mov_b32 s2, -1 2529; CI-NEXT: buffer_load_dwordx2 v[2:3], off, s[0:3], 0 2530; CI-NEXT: s_addc_u32 s37, s37, 0 2531; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2532; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2533; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2534; CI-NEXT: s_mov_b32 s32, 0 2535; CI-NEXT: s_getpc_b64 s[4:5] 2536; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2537; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2538; CI-NEXT: s_waitcnt vmcnt(0) 2539; CI-NEXT: v_alignbit_b32 v1, v3, v2, 16 2540; CI-NEXT: v_mov_b32_e32 v0, v2 2541; CI-NEXT: v_mov_b32_e32 v2, v3 2542; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2543; CI-NEXT: s_endpgm 2544; 2545; GFX9-LABEL: test_call_external_void_func_v3i16: 2546; GFX9: ; %bb.0: 2547; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2548; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2549; GFX9-NEXT: s_mov_b32 s38, -1 2550; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2551; GFX9-NEXT: s_add_u32 s36, s36, s3 2552; GFX9-NEXT: s_mov_b32 s3, 0xf000 2553; GFX9-NEXT: s_mov_b32 s2, -1 2554; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2555; GFX9-NEXT: s_addc_u32 s37, s37, 0 2556; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2557; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2558; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2559; GFX9-NEXT: s_mov_b32 s32, 0 2560; GFX9-NEXT: s_getpc_b64 s[4:5] 2561; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2562; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2563; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2564; GFX9-NEXT: s_endpgm 2565; 2566; GFX11-LABEL: test_call_external_void_func_v3i16: 2567; GFX11: ; %bb.0: 2568; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2569; GFX11-NEXT: s_mov_b32 s2, -1 2570; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2571; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 2572; GFX11-NEXT: s_mov_b32 s32, 0 2573; GFX11-NEXT: s_getpc_b64 s[2:3] 2574; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4 2575; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12 2576; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2577; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2578; GFX11-NEXT: s_endpgm 2579; 2580; HSA-LABEL: test_call_external_void_func_v3i16: 2581; HSA: ; %bb.0: 2582; HSA-NEXT: s_add_i32 s6, s6, s9 2583; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2584; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2585; HSA-NEXT: s_mov_b32 s7, 0x1100f000 2586; HSA-NEXT: s_mov_b32 s6, -1 2587; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 2588; HSA-NEXT: s_add_u32 s0, s0, s9 2589; HSA-NEXT: s_addc_u32 s1, s1, 0 2590; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2591; HSA-NEXT: s_mov_b32 s32, 0 2592; HSA-NEXT: s_getpc_b64 s[8:9] 2593; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4 2594; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12 2595; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2596; HSA-NEXT: s_endpgm 2597 %val = load <3 x i16>, ptr addrspace(1) undef 2598 call void @external_void_func_v3i16(<3 x i16> %val) 2599 ret void 2600} 2601 2602define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { 2603; VI-LABEL: test_call_external_void_func_v3f16: 2604; VI: ; %bb.0: 2605; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2606; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2607; VI-NEXT: s_mov_b32 s38, -1 2608; VI-NEXT: s_mov_b32 s39, 0xe80000 2609; VI-NEXT: s_add_u32 s36, s36, s3 2610; VI-NEXT: s_mov_b32 s3, 0xf000 2611; VI-NEXT: s_mov_b32 s2, -1 2612; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2613; VI-NEXT: s_addc_u32 s37, s37, 0 2614; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2615; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2616; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2617; VI-NEXT: s_mov_b32 s32, 0 2618; VI-NEXT: s_getpc_b64 s[4:5] 2619; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2620; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2621; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2622; VI-NEXT: s_endpgm 2623; 2624; CI-LABEL: test_call_external_void_func_v3f16: 2625; CI: ; %bb.0: 2626; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2627; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2628; CI-NEXT: s_mov_b32 s38, -1 2629; CI-NEXT: s_mov_b32 s39, 0xe8f000 2630; CI-NEXT: s_add_u32 s36, s36, s3 2631; CI-NEXT: s_mov_b32 s3, 0xf000 2632; CI-NEXT: s_mov_b32 s2, -1 2633; CI-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 2634; CI-NEXT: s_addc_u32 s37, s37, 0 2635; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2636; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2637; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2638; CI-NEXT: s_mov_b32 s32, 0 2639; CI-NEXT: s_getpc_b64 s[4:5] 2640; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2641; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2642; CI-NEXT: s_waitcnt vmcnt(0) 2643; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 2644; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 2645; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 2646; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2647; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2648; CI-NEXT: s_endpgm 2649; 2650; GFX9-LABEL: test_call_external_void_func_v3f16: 2651; GFX9: ; %bb.0: 2652; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2653; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2654; GFX9-NEXT: s_mov_b32 s38, -1 2655; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2656; GFX9-NEXT: s_add_u32 s36, s36, s3 2657; GFX9-NEXT: s_mov_b32 s3, 0xf000 2658; GFX9-NEXT: s_mov_b32 s2, -1 2659; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2660; GFX9-NEXT: s_addc_u32 s37, s37, 0 2661; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2662; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2663; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2664; GFX9-NEXT: s_mov_b32 s32, 0 2665; GFX9-NEXT: s_getpc_b64 s[4:5] 2666; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2667; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2668; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2669; GFX9-NEXT: s_endpgm 2670; 2671; GFX11-LABEL: test_call_external_void_func_v3f16: 2672; GFX11: ; %bb.0: 2673; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2674; GFX11-NEXT: s_mov_b32 s2, -1 2675; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2676; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 2677; GFX11-NEXT: s_mov_b32 s32, 0 2678; GFX11-NEXT: s_getpc_b64 s[2:3] 2679; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4 2680; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12 2681; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2682; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2683; GFX11-NEXT: s_endpgm 2684; 2685; HSA-LABEL: test_call_external_void_func_v3f16: 2686; HSA: ; %bb.0: 2687; HSA-NEXT: s_add_i32 s6, s6, s9 2688; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2689; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2690; HSA-NEXT: s_mov_b32 s7, 0x1100f000 2691; HSA-NEXT: s_mov_b32 s6, -1 2692; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 2693; HSA-NEXT: s_add_u32 s0, s0, s9 2694; HSA-NEXT: s_addc_u32 s1, s1, 0 2695; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2696; HSA-NEXT: s_mov_b32 s32, 0 2697; HSA-NEXT: s_getpc_b64 s[8:9] 2698; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4 2699; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12 2700; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2701; HSA-NEXT: s_endpgm 2702 %val = load <3 x half>, ptr addrspace(1) undef 2703 call void @external_void_func_v3f16(<3 x half> %val) 2704 ret void 2705} 2706 2707define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { 2708; VI-LABEL: test_call_external_void_func_v3i16_imm: 2709; VI: ; %bb.0: 2710; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2711; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2712; VI-NEXT: s_mov_b32 s38, -1 2713; VI-NEXT: s_mov_b32 s39, 0xe80000 2714; VI-NEXT: s_add_u32 s36, s36, s3 2715; VI-NEXT: s_addc_u32 s37, s37, 0 2716; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2717; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2718; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2719; VI-NEXT: v_mov_b32_e32 v0, 0x20001 2720; VI-NEXT: v_mov_b32_e32 v1, 3 2721; VI-NEXT: s_mov_b32 s32, 0 2722; VI-NEXT: s_getpc_b64 s[4:5] 2723; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2724; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2725; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2726; VI-NEXT: s_endpgm 2727; 2728; CI-LABEL: test_call_external_void_func_v3i16_imm: 2729; CI: ; %bb.0: 2730; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2731; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2732; CI-NEXT: s_mov_b32 s38, -1 2733; CI-NEXT: s_mov_b32 s39, 0xe8f000 2734; CI-NEXT: s_add_u32 s36, s36, s3 2735; CI-NEXT: s_addc_u32 s37, s37, 0 2736; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2737; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2738; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2739; CI-NEXT: v_mov_b32_e32 v0, 1 2740; CI-NEXT: v_mov_b32_e32 v1, 2 2741; CI-NEXT: v_mov_b32_e32 v2, 3 2742; CI-NEXT: s_mov_b32 s32, 0 2743; CI-NEXT: s_getpc_b64 s[4:5] 2744; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2745; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2746; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2747; CI-NEXT: s_endpgm 2748; 2749; GFX9-LABEL: test_call_external_void_func_v3i16_imm: 2750; GFX9: ; %bb.0: 2751; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2752; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2753; GFX9-NEXT: s_mov_b32 s38, -1 2754; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2755; GFX9-NEXT: s_add_u32 s36, s36, s3 2756; GFX9-NEXT: s_addc_u32 s37, s37, 0 2757; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2758; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2759; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2760; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 2761; GFX9-NEXT: v_mov_b32_e32 v1, 3 2762; GFX9-NEXT: s_mov_b32 s32, 0 2763; GFX9-NEXT: s_getpc_b64 s[4:5] 2764; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4 2765; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12 2766; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2767; GFX9-NEXT: s_endpgm 2768; 2769; GFX11-LABEL: test_call_external_void_func_v3i16_imm: 2770; GFX11: ; %bb.0: 2771; GFX11-NEXT: v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3 2772; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2773; GFX11-NEXT: s_mov_b32 s32, 0 2774; GFX11-NEXT: s_getpc_b64 s[2:3] 2775; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4 2776; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12 2777; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2778; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2779; GFX11-NEXT: s_endpgm 2780; 2781; HSA-LABEL: test_call_external_void_func_v3i16_imm: 2782; HSA: ; %bb.0: 2783; HSA-NEXT: s_add_i32 s6, s6, s9 2784; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2785; HSA-NEXT: s_add_u32 s0, s0, s9 2786; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2787; HSA-NEXT: s_addc_u32 s1, s1, 0 2788; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2789; HSA-NEXT: v_mov_b32_e32 v0, 0x20001 2790; HSA-NEXT: v_mov_b32_e32 v1, 3 2791; HSA-NEXT: s_mov_b32 s32, 0 2792; HSA-NEXT: s_getpc_b64 s[8:9] 2793; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4 2794; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12 2795; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2796; HSA-NEXT: s_endpgm 2797 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>) 2798 ret void 2799} 2800 2801define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { 2802; VI-LABEL: test_call_external_void_func_v3f16_imm: 2803; VI: ; %bb.0: 2804; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2805; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2806; VI-NEXT: s_mov_b32 s38, -1 2807; VI-NEXT: s_mov_b32 s39, 0xe80000 2808; VI-NEXT: s_add_u32 s36, s36, s3 2809; VI-NEXT: s_addc_u32 s37, s37, 0 2810; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2811; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2812; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2813; VI-NEXT: v_mov_b32_e32 v0, 0x40003c00 2814; VI-NEXT: v_mov_b32_e32 v1, 0x4400 2815; VI-NEXT: s_mov_b32 s32, 0 2816; VI-NEXT: s_getpc_b64 s[4:5] 2817; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2818; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2819; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2820; VI-NEXT: s_endpgm 2821; 2822; CI-LABEL: test_call_external_void_func_v3f16_imm: 2823; CI: ; %bb.0: 2824; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2825; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2826; CI-NEXT: s_mov_b32 s38, -1 2827; CI-NEXT: s_mov_b32 s39, 0xe8f000 2828; CI-NEXT: s_add_u32 s36, s36, s3 2829; CI-NEXT: s_addc_u32 s37, s37, 0 2830; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2831; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2832; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2833; CI-NEXT: v_mov_b32_e32 v0, 1.0 2834; CI-NEXT: v_mov_b32_e32 v1, 2.0 2835; CI-NEXT: v_mov_b32_e32 v2, 4.0 2836; CI-NEXT: s_mov_b32 s32, 0 2837; CI-NEXT: s_getpc_b64 s[4:5] 2838; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2839; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2840; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2841; CI-NEXT: s_endpgm 2842; 2843; GFX9-LABEL: test_call_external_void_func_v3f16_imm: 2844; GFX9: ; %bb.0: 2845; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2846; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2847; GFX9-NEXT: s_mov_b32 s38, -1 2848; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2849; GFX9-NEXT: s_add_u32 s36, s36, s3 2850; GFX9-NEXT: s_addc_u32 s37, s37, 0 2851; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2852; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2853; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2854; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 2855; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 2856; GFX9-NEXT: s_mov_b32 s32, 0 2857; GFX9-NEXT: s_getpc_b64 s[4:5] 2858; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4 2859; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12 2860; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2861; GFX9-NEXT: s_endpgm 2862; 2863; GFX11-LABEL: test_call_external_void_func_v3f16_imm: 2864; GFX11: ; %bb.0: 2865; GFX11-NEXT: v_mov_b32_e32 v0, 0x40003c00 2866; GFX11-NEXT: v_mov_b32_e32 v1, 0x4400 2867; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2868; GFX11-NEXT: s_mov_b32 s32, 0 2869; GFX11-NEXT: s_getpc_b64 s[2:3] 2870; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4 2871; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12 2872; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2873; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2874; GFX11-NEXT: s_endpgm 2875; 2876; HSA-LABEL: test_call_external_void_func_v3f16_imm: 2877; HSA: ; %bb.0: 2878; HSA-NEXT: s_add_i32 s6, s6, s9 2879; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2880; HSA-NEXT: s_add_u32 s0, s0, s9 2881; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2882; HSA-NEXT: s_addc_u32 s1, s1, 0 2883; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2884; HSA-NEXT: v_mov_b32_e32 v0, 0x40003c00 2885; HSA-NEXT: v_mov_b32_e32 v1, 0x4400 2886; HSA-NEXT: s_mov_b32 s32, 0 2887; HSA-NEXT: s_getpc_b64 s[8:9] 2888; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4 2889; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12 2890; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2891; HSA-NEXT: s_endpgm 2892 call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>) 2893 ret void 2894} 2895 2896define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { 2897; VI-LABEL: test_call_external_void_func_v4i16: 2898; VI: ; %bb.0: 2899; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2900; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2901; VI-NEXT: s_mov_b32 s38, -1 2902; VI-NEXT: s_mov_b32 s39, 0xe80000 2903; VI-NEXT: s_add_u32 s36, s36, s3 2904; VI-NEXT: s_mov_b32 s3, 0xf000 2905; VI-NEXT: s_mov_b32 s2, -1 2906; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2907; VI-NEXT: s_addc_u32 s37, s37, 0 2908; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 2909; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 2910; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 2911; VI-NEXT: s_mov_b32 s32, 0 2912; VI-NEXT: s_getpc_b64 s[4:5] 2913; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 2914; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 2915; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2916; VI-NEXT: s_endpgm 2917; 2918; CI-LABEL: test_call_external_void_func_v4i16: 2919; CI: ; %bb.0: 2920; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2921; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2922; CI-NEXT: s_mov_b32 s38, -1 2923; CI-NEXT: s_mov_b32 s39, 0xe8f000 2924; CI-NEXT: s_add_u32 s36, s36, s3 2925; CI-NEXT: s_mov_b32 s3, 0xf000 2926; CI-NEXT: s_mov_b32 s2, -1 2927; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2928; CI-NEXT: s_addc_u32 s37, s37, 0 2929; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 2930; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 2931; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 2932; CI-NEXT: s_mov_b32 s32, 0 2933; CI-NEXT: s_getpc_b64 s[4:5] 2934; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 2935; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 2936; CI-NEXT: s_waitcnt vmcnt(0) 2937; CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 2938; CI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 2939; CI-NEXT: v_mov_b32_e32 v2, v1 2940; CI-NEXT: v_mov_b32_e32 v1, v4 2941; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 2942; CI-NEXT: s_endpgm 2943; 2944; GFX9-LABEL: test_call_external_void_func_v4i16: 2945; GFX9: ; %bb.0: 2946; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 2947; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 2948; GFX9-NEXT: s_mov_b32 s38, -1 2949; GFX9-NEXT: s_mov_b32 s39, 0xe00000 2950; GFX9-NEXT: s_add_u32 s36, s36, s3 2951; GFX9-NEXT: s_mov_b32 s3, 0xf000 2952; GFX9-NEXT: s_mov_b32 s2, -1 2953; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 2954; GFX9-NEXT: s_addc_u32 s37, s37, 0 2955; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 2956; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 2957; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 2958; GFX9-NEXT: s_mov_b32 s32, 0 2959; GFX9-NEXT: s_getpc_b64 s[4:5] 2960; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 2961; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 2962; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 2963; GFX9-NEXT: s_endpgm 2964; 2965; GFX11-LABEL: test_call_external_void_func_v4i16: 2966; GFX11: ; %bb.0: 2967; GFX11-NEXT: s_mov_b32 s3, 0x31016000 2968; GFX11-NEXT: s_mov_b32 s2, -1 2969; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 2970; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 2971; GFX11-NEXT: s_mov_b32 s32, 0 2972; GFX11-NEXT: s_getpc_b64 s[2:3] 2973; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4 2974; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12 2975; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 2976; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 2977; GFX11-NEXT: s_endpgm 2978; 2979; HSA-LABEL: test_call_external_void_func_v4i16: 2980; HSA: ; %bb.0: 2981; HSA-NEXT: s_add_i32 s6, s6, s9 2982; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 2983; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 2984; HSA-NEXT: s_mov_b32 s7, 0x1100f000 2985; HSA-NEXT: s_mov_b32 s6, -1 2986; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 2987; HSA-NEXT: s_add_u32 s0, s0, s9 2988; HSA-NEXT: s_addc_u32 s1, s1, 0 2989; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 2990; HSA-NEXT: s_mov_b32 s32, 0 2991; HSA-NEXT: s_getpc_b64 s[8:9] 2992; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4 2993; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12 2994; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 2995; HSA-NEXT: s_endpgm 2996 %val = load <4 x i16>, ptr addrspace(1) undef 2997 call void @external_void_func_v4i16(<4 x i16> %val) 2998 ret void 2999} 3000 3001define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { 3002; VI-LABEL: test_call_external_void_func_v4i16_imm: 3003; VI: ; %bb.0: 3004; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3005; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3006; VI-NEXT: s_mov_b32 s38, -1 3007; VI-NEXT: s_mov_b32 s39, 0xe80000 3008; VI-NEXT: s_add_u32 s36, s36, s3 3009; VI-NEXT: s_addc_u32 s37, s37, 0 3010; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3011; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3012; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3013; VI-NEXT: v_mov_b32_e32 v0, 0x20001 3014; VI-NEXT: v_mov_b32_e32 v1, 0x40003 3015; VI-NEXT: s_mov_b32 s32, 0 3016; VI-NEXT: s_getpc_b64 s[4:5] 3017; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 3018; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 3019; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3020; VI-NEXT: s_endpgm 3021; 3022; CI-LABEL: test_call_external_void_func_v4i16_imm: 3023; CI: ; %bb.0: 3024; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3025; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3026; CI-NEXT: s_mov_b32 s38, -1 3027; CI-NEXT: s_mov_b32 s39, 0xe8f000 3028; CI-NEXT: s_add_u32 s36, s36, s3 3029; CI-NEXT: s_addc_u32 s37, s37, 0 3030; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3031; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3032; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3033; CI-NEXT: v_mov_b32_e32 v0, 1 3034; CI-NEXT: v_mov_b32_e32 v1, 2 3035; CI-NEXT: v_mov_b32_e32 v2, 3 3036; CI-NEXT: v_mov_b32_e32 v3, 4 3037; CI-NEXT: s_mov_b32 s32, 0 3038; CI-NEXT: s_getpc_b64 s[4:5] 3039; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 3040; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 3041; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3042; CI-NEXT: s_endpgm 3043; 3044; GFX9-LABEL: test_call_external_void_func_v4i16_imm: 3045; GFX9: ; %bb.0: 3046; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3047; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3048; GFX9-NEXT: s_mov_b32 s38, -1 3049; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3050; GFX9-NEXT: s_add_u32 s36, s36, s3 3051; GFX9-NEXT: s_addc_u32 s37, s37, 0 3052; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3053; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3054; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3055; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 3056; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 3057; GFX9-NEXT: s_mov_b32 s32, 0 3058; GFX9-NEXT: s_getpc_b64 s[4:5] 3059; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4 3060; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12 3061; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3062; GFX9-NEXT: s_endpgm 3063; 3064; GFX11-LABEL: test_call_external_void_func_v4i16_imm: 3065; GFX11: ; %bb.0: 3066; GFX11-NEXT: v_mov_b32_e32 v0, 0x20001 3067; GFX11-NEXT: v_mov_b32_e32 v1, 0x40003 3068; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3069; GFX11-NEXT: s_mov_b32 s32, 0 3070; GFX11-NEXT: s_getpc_b64 s[2:3] 3071; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4 3072; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12 3073; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3074; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3075; GFX11-NEXT: s_endpgm 3076; 3077; HSA-LABEL: test_call_external_void_func_v4i16_imm: 3078; HSA: ; %bb.0: 3079; HSA-NEXT: s_add_i32 s6, s6, s9 3080; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3081; HSA-NEXT: s_add_u32 s0, s0, s9 3082; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3083; HSA-NEXT: s_addc_u32 s1, s1, 0 3084; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3085; HSA-NEXT: v_mov_b32_e32 v0, 0x20001 3086; HSA-NEXT: v_mov_b32_e32 v1, 0x40003 3087; HSA-NEXT: s_mov_b32 s32, 0 3088; HSA-NEXT: s_getpc_b64 s[8:9] 3089; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4 3090; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12 3091; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3092; HSA-NEXT: s_endpgm 3093 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>) 3094 ret void 3095} 3096 3097define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { 3098; VI-LABEL: test_call_external_void_func_v2f16: 3099; VI: ; %bb.0: 3100; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3101; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3102; VI-NEXT: s_mov_b32 s38, -1 3103; VI-NEXT: s_mov_b32 s39, 0xe80000 3104; VI-NEXT: s_add_u32 s36, s36, s3 3105; VI-NEXT: s_mov_b32 s3, 0xf000 3106; VI-NEXT: s_mov_b32 s2, -1 3107; VI-NEXT: buffer_load_dword v0, off, s[0:3], 0 3108; VI-NEXT: s_addc_u32 s37, s37, 0 3109; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3110; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3111; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3112; VI-NEXT: s_mov_b32 s32, 0 3113; VI-NEXT: s_getpc_b64 s[4:5] 3114; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 3115; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 3116; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3117; VI-NEXT: s_endpgm 3118; 3119; CI-LABEL: test_call_external_void_func_v2f16: 3120; CI: ; %bb.0: 3121; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3122; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3123; CI-NEXT: s_mov_b32 s38, -1 3124; CI-NEXT: s_mov_b32 s39, 0xe8f000 3125; CI-NEXT: s_add_u32 s36, s36, s3 3126; CI-NEXT: s_mov_b32 s3, 0xf000 3127; CI-NEXT: s_mov_b32 s2, -1 3128; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 3129; CI-NEXT: s_addc_u32 s37, s37, 0 3130; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3131; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3132; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3133; CI-NEXT: s_mov_b32 s32, 0 3134; CI-NEXT: s_getpc_b64 s[4:5] 3135; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 3136; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 3137; CI-NEXT: s_waitcnt vmcnt(0) 3138; CI-NEXT: v_cvt_f32_f16_e32 v0, v1 3139; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3140; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 3141; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3142; CI-NEXT: s_endpgm 3143; 3144; GFX9-LABEL: test_call_external_void_func_v2f16: 3145; GFX9: ; %bb.0: 3146; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3147; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3148; GFX9-NEXT: s_mov_b32 s38, -1 3149; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3150; GFX9-NEXT: s_add_u32 s36, s36, s3 3151; GFX9-NEXT: s_mov_b32 s3, 0xf000 3152; GFX9-NEXT: s_mov_b32 s2, -1 3153; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], 0 3154; GFX9-NEXT: s_addc_u32 s37, s37, 0 3155; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3156; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3157; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3158; GFX9-NEXT: s_mov_b32 s32, 0 3159; GFX9-NEXT: s_getpc_b64 s[4:5] 3160; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4 3161; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12 3162; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3163; GFX9-NEXT: s_endpgm 3164; 3165; GFX11-LABEL: test_call_external_void_func_v2f16: 3166; GFX11: ; %bb.0: 3167; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3168; GFX11-NEXT: s_mov_b32 s2, -1 3169; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3170; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 3171; GFX11-NEXT: s_mov_b32 s32, 0 3172; GFX11-NEXT: s_getpc_b64 s[2:3] 3173; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2f16@rel32@lo+4 3174; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2f16@rel32@hi+12 3175; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3176; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3177; GFX11-NEXT: s_endpgm 3178; 3179; HSA-LABEL: test_call_external_void_func_v2f16: 3180; HSA: ; %bb.0: 3181; HSA-NEXT: s_add_i32 s6, s6, s9 3182; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3183; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3184; HSA-NEXT: s_mov_b32 s7, 0x1100f000 3185; HSA-NEXT: s_mov_b32 s6, -1 3186; HSA-NEXT: buffer_load_dword v0, off, s[4:7], 0 3187; HSA-NEXT: s_add_u32 s0, s0, s9 3188; HSA-NEXT: s_addc_u32 s1, s1, 0 3189; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3190; HSA-NEXT: s_mov_b32 s32, 0 3191; HSA-NEXT: s_getpc_b64 s[8:9] 3192; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2f16@rel32@lo+4 3193; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2f16@rel32@hi+12 3194; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3195; HSA-NEXT: s_endpgm 3196 %val = load <2 x half>, ptr addrspace(1) undef 3197 call void @external_void_func_v2f16(<2 x half> %val) 3198 ret void 3199} 3200 3201define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { 3202; VI-LABEL: test_call_external_void_func_v2i32: 3203; VI: ; %bb.0: 3204; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3205; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3206; VI-NEXT: s_mov_b32 s38, -1 3207; VI-NEXT: s_mov_b32 s39, 0xe80000 3208; VI-NEXT: s_add_u32 s36, s36, s3 3209; VI-NEXT: s_mov_b32 s3, 0xf000 3210; VI-NEXT: s_mov_b32 s2, -1 3211; VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 3212; VI-NEXT: s_addc_u32 s37, s37, 0 3213; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3214; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3215; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3216; VI-NEXT: s_mov_b32 s32, 0 3217; VI-NEXT: s_getpc_b64 s[4:5] 3218; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3219; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3220; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3221; VI-NEXT: s_endpgm 3222; 3223; CI-LABEL: test_call_external_void_func_v2i32: 3224; CI: ; %bb.0: 3225; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3226; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3227; CI-NEXT: s_mov_b32 s38, -1 3228; CI-NEXT: s_mov_b32 s39, 0xe8f000 3229; CI-NEXT: s_add_u32 s36, s36, s3 3230; CI-NEXT: s_mov_b32 s3, 0xf000 3231; CI-NEXT: s_mov_b32 s2, -1 3232; CI-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 3233; CI-NEXT: s_addc_u32 s37, s37, 0 3234; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3235; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3236; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3237; CI-NEXT: s_mov_b32 s32, 0 3238; CI-NEXT: s_getpc_b64 s[4:5] 3239; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3240; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3241; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3242; CI-NEXT: s_endpgm 3243; 3244; GFX9-LABEL: test_call_external_void_func_v2i32: 3245; GFX9: ; %bb.0: 3246; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3247; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3248; GFX9-NEXT: s_mov_b32 s38, -1 3249; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3250; GFX9-NEXT: s_add_u32 s36, s36, s3 3251; GFX9-NEXT: s_mov_b32 s3, 0xf000 3252; GFX9-NEXT: s_mov_b32 s2, -1 3253; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[0:3], 0 3254; GFX9-NEXT: s_addc_u32 s37, s37, 0 3255; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3256; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3257; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3258; GFX9-NEXT: s_mov_b32 s32, 0 3259; GFX9-NEXT: s_getpc_b64 s[4:5] 3260; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3261; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3262; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3263; GFX9-NEXT: s_endpgm 3264; 3265; GFX11-LABEL: test_call_external_void_func_v2i32: 3266; GFX11: ; %bb.0: 3267; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3268; GFX11-NEXT: s_mov_b32 s2, -1 3269; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3270; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 3271; GFX11-NEXT: s_mov_b32 s32, 0 3272; GFX11-NEXT: s_getpc_b64 s[2:3] 3273; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4 3274; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12 3275; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3276; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3277; GFX11-NEXT: s_endpgm 3278; 3279; HSA-LABEL: test_call_external_void_func_v2i32: 3280; HSA: ; %bb.0: 3281; HSA-NEXT: s_add_i32 s6, s6, s9 3282; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3283; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3284; HSA-NEXT: s_mov_b32 s7, 0x1100f000 3285; HSA-NEXT: s_mov_b32 s6, -1 3286; HSA-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 3287; HSA-NEXT: s_add_u32 s0, s0, s9 3288; HSA-NEXT: s_addc_u32 s1, s1, 0 3289; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3290; HSA-NEXT: s_mov_b32 s32, 0 3291; HSA-NEXT: s_getpc_b64 s[8:9] 3292; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4 3293; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12 3294; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3295; HSA-NEXT: s_endpgm 3296 %val = load <2 x i32>, ptr addrspace(1) undef 3297 call void @external_void_func_v2i32(<2 x i32> %val) 3298 ret void 3299} 3300 3301define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { 3302; VI-LABEL: test_call_external_void_func_v2i32_imm: 3303; VI: ; %bb.0: 3304; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3305; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3306; VI-NEXT: s_mov_b32 s38, -1 3307; VI-NEXT: s_mov_b32 s39, 0xe80000 3308; VI-NEXT: s_add_u32 s36, s36, s3 3309; VI-NEXT: s_addc_u32 s37, s37, 0 3310; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3311; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3312; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3313; VI-NEXT: v_mov_b32_e32 v0, 1 3314; VI-NEXT: v_mov_b32_e32 v1, 2 3315; VI-NEXT: s_mov_b32 s32, 0 3316; VI-NEXT: s_getpc_b64 s[4:5] 3317; VI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3318; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3319; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3320; VI-NEXT: s_endpgm 3321; 3322; CI-LABEL: test_call_external_void_func_v2i32_imm: 3323; CI: ; %bb.0: 3324; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3325; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3326; CI-NEXT: s_mov_b32 s38, -1 3327; CI-NEXT: s_mov_b32 s39, 0xe8f000 3328; CI-NEXT: s_add_u32 s36, s36, s3 3329; CI-NEXT: s_addc_u32 s37, s37, 0 3330; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3331; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3332; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3333; CI-NEXT: v_mov_b32_e32 v0, 1 3334; CI-NEXT: v_mov_b32_e32 v1, 2 3335; CI-NEXT: s_mov_b32 s32, 0 3336; CI-NEXT: s_getpc_b64 s[4:5] 3337; CI-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3338; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3339; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3340; CI-NEXT: s_endpgm 3341; 3342; GFX9-LABEL: test_call_external_void_func_v2i32_imm: 3343; GFX9: ; %bb.0: 3344; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3345; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3346; GFX9-NEXT: s_mov_b32 s38, -1 3347; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3348; GFX9-NEXT: s_add_u32 s36, s36, s3 3349; GFX9-NEXT: s_addc_u32 s37, s37, 0 3350; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3351; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3352; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3353; GFX9-NEXT: v_mov_b32_e32 v0, 1 3354; GFX9-NEXT: v_mov_b32_e32 v1, 2 3355; GFX9-NEXT: s_mov_b32 s32, 0 3356; GFX9-NEXT: s_getpc_b64 s[4:5] 3357; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4 3358; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12 3359; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3360; GFX9-NEXT: s_endpgm 3361; 3362; GFX11-LABEL: test_call_external_void_func_v2i32_imm: 3363; GFX11: ; %bb.0: 3364; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 3365; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3366; GFX11-NEXT: s_mov_b32 s32, 0 3367; GFX11-NEXT: s_getpc_b64 s[2:3] 3368; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4 3369; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12 3370; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3371; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3372; GFX11-NEXT: s_endpgm 3373; 3374; HSA-LABEL: test_call_external_void_func_v2i32_imm: 3375; HSA: ; %bb.0: 3376; HSA-NEXT: s_add_i32 s6, s6, s9 3377; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3378; HSA-NEXT: s_add_u32 s0, s0, s9 3379; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3380; HSA-NEXT: s_addc_u32 s1, s1, 0 3381; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3382; HSA-NEXT: v_mov_b32_e32 v0, 1 3383; HSA-NEXT: v_mov_b32_e32 v1, 2 3384; HSA-NEXT: s_mov_b32 s32, 0 3385; HSA-NEXT: s_getpc_b64 s[8:9] 3386; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4 3387; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12 3388; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3389; HSA-NEXT: s_endpgm 3390 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>) 3391 ret void 3392} 3393 3394define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { 3395; VI-LABEL: test_call_external_void_func_v3i32_imm: 3396; VI: ; %bb.0: 3397; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3398; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3399; VI-NEXT: s_mov_b32 s38, -1 3400; VI-NEXT: s_mov_b32 s39, 0xe80000 3401; VI-NEXT: s_add_u32 s36, s36, s5 3402; VI-NEXT: s_addc_u32 s37, s37, 0 3403; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3404; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3405; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3406; VI-NEXT: v_mov_b32_e32 v0, 3 3407; VI-NEXT: v_mov_b32_e32 v1, 4 3408; VI-NEXT: v_mov_b32_e32 v2, 5 3409; VI-NEXT: s_mov_b32 s32, 0 3410; VI-NEXT: s_getpc_b64 s[4:5] 3411; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 3412; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 3413; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3414; VI-NEXT: s_endpgm 3415; 3416; CI-LABEL: test_call_external_void_func_v3i32_imm: 3417; CI: ; %bb.0: 3418; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3419; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3420; CI-NEXT: s_mov_b32 s38, -1 3421; CI-NEXT: s_mov_b32 s39, 0xe8f000 3422; CI-NEXT: s_add_u32 s36, s36, s5 3423; CI-NEXT: s_addc_u32 s37, s37, 0 3424; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3425; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3426; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3427; CI-NEXT: v_mov_b32_e32 v0, 3 3428; CI-NEXT: v_mov_b32_e32 v1, 4 3429; CI-NEXT: v_mov_b32_e32 v2, 5 3430; CI-NEXT: s_mov_b32 s32, 0 3431; CI-NEXT: s_getpc_b64 s[4:5] 3432; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 3433; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 3434; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3435; CI-NEXT: s_endpgm 3436; 3437; GFX9-LABEL: test_call_external_void_func_v3i32_imm: 3438; GFX9: ; %bb.0: 3439; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3440; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3441; GFX9-NEXT: s_mov_b32 s38, -1 3442; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3443; GFX9-NEXT: s_add_u32 s36, s36, s5 3444; GFX9-NEXT: s_addc_u32 s37, s37, 0 3445; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3446; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3447; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3448; GFX9-NEXT: v_mov_b32_e32 v0, 3 3449; GFX9-NEXT: v_mov_b32_e32 v1, 4 3450; GFX9-NEXT: v_mov_b32_e32 v2, 5 3451; GFX9-NEXT: s_mov_b32 s32, 0 3452; GFX9-NEXT: s_getpc_b64 s[4:5] 3453; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4 3454; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12 3455; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3456; GFX9-NEXT: s_endpgm 3457; 3458; GFX11-LABEL: test_call_external_void_func_v3i32_imm: 3459; GFX11: ; %bb.0: 3460; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 3461; GFX11-NEXT: v_mov_b32_e32 v2, 5 3462; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3463; GFX11-NEXT: s_mov_b32 s32, 0 3464; GFX11-NEXT: s_getpc_b64 s[2:3] 3465; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32@rel32@lo+4 3466; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32@rel32@hi+12 3467; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3468; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3469; GFX11-NEXT: s_endpgm 3470; 3471; HSA-LABEL: test_call_external_void_func_v3i32_imm: 3472; HSA: ; %bb.0: 3473; HSA-NEXT: s_add_i32 s8, s8, s11 3474; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 3475; HSA-NEXT: s_add_u32 s0, s0, s11 3476; HSA-NEXT: s_addc_u32 s1, s1, 0 3477; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3478; HSA-NEXT: v_mov_b32_e32 v0, 3 3479; HSA-NEXT: v_mov_b32_e32 v1, 4 3480; HSA-NEXT: v_mov_b32_e32 v2, 5 3481; HSA-NEXT: s_mov_b32 s32, 0 3482; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 3483; HSA-NEXT: s_getpc_b64 s[8:9] 3484; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32@rel32@lo+4 3485; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32@rel32@hi+12 3486; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3487; HSA-NEXT: s_endpgm 3488 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>) 3489 ret void 3490} 3491 3492define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { 3493; VI-LABEL: test_call_external_void_func_v3i32_i32: 3494; VI: ; %bb.0: 3495; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3496; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3497; VI-NEXT: s_mov_b32 s38, -1 3498; VI-NEXT: s_mov_b32 s39, 0xe80000 3499; VI-NEXT: s_add_u32 s36, s36, s5 3500; VI-NEXT: s_addc_u32 s37, s37, 0 3501; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3502; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3503; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3504; VI-NEXT: v_mov_b32_e32 v0, 3 3505; VI-NEXT: v_mov_b32_e32 v1, 4 3506; VI-NEXT: v_mov_b32_e32 v2, 5 3507; VI-NEXT: v_mov_b32_e32 v3, 6 3508; VI-NEXT: s_mov_b32 s32, 0 3509; VI-NEXT: s_getpc_b64 s[4:5] 3510; VI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 3511; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 3512; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3513; VI-NEXT: s_endpgm 3514; 3515; CI-LABEL: test_call_external_void_func_v3i32_i32: 3516; CI: ; %bb.0: 3517; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3518; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3519; CI-NEXT: s_mov_b32 s38, -1 3520; CI-NEXT: s_mov_b32 s39, 0xe8f000 3521; CI-NEXT: s_add_u32 s36, s36, s5 3522; CI-NEXT: s_addc_u32 s37, s37, 0 3523; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3524; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3525; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3526; CI-NEXT: v_mov_b32_e32 v0, 3 3527; CI-NEXT: v_mov_b32_e32 v1, 4 3528; CI-NEXT: v_mov_b32_e32 v2, 5 3529; CI-NEXT: v_mov_b32_e32 v3, 6 3530; CI-NEXT: s_mov_b32 s32, 0 3531; CI-NEXT: s_getpc_b64 s[4:5] 3532; CI-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 3533; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 3534; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3535; CI-NEXT: s_endpgm 3536; 3537; GFX9-LABEL: test_call_external_void_func_v3i32_i32: 3538; GFX9: ; %bb.0: 3539; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3540; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3541; GFX9-NEXT: s_mov_b32 s38, -1 3542; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3543; GFX9-NEXT: s_add_u32 s36, s36, s5 3544; GFX9-NEXT: s_addc_u32 s37, s37, 0 3545; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3546; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3547; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3548; GFX9-NEXT: v_mov_b32_e32 v0, 3 3549; GFX9-NEXT: v_mov_b32_e32 v1, 4 3550; GFX9-NEXT: v_mov_b32_e32 v2, 5 3551; GFX9-NEXT: v_mov_b32_e32 v3, 6 3552; GFX9-NEXT: s_mov_b32 s32, 0 3553; GFX9-NEXT: s_getpc_b64 s[4:5] 3554; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4 3555; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12 3556; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3557; GFX9-NEXT: s_endpgm 3558; 3559; GFX11-LABEL: test_call_external_void_func_v3i32_i32: 3560; GFX11: ; %bb.0: 3561; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4 3562; GFX11-NEXT: v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6 3563; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3564; GFX11-NEXT: s_mov_b32 s32, 0 3565; GFX11-NEXT: s_getpc_b64 s[2:3] 3566; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v3i32_i32@rel32@lo+4 3567; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v3i32_i32@rel32@hi+12 3568; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3569; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3570; GFX11-NEXT: s_endpgm 3571; 3572; HSA-LABEL: test_call_external_void_func_v3i32_i32: 3573; HSA: ; %bb.0: 3574; HSA-NEXT: s_add_i32 s8, s8, s11 3575; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 3576; HSA-NEXT: s_add_u32 s0, s0, s11 3577; HSA-NEXT: s_addc_u32 s1, s1, 0 3578; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3579; HSA-NEXT: v_mov_b32_e32 v0, 3 3580; HSA-NEXT: v_mov_b32_e32 v1, 4 3581; HSA-NEXT: v_mov_b32_e32 v2, 5 3582; HSA-NEXT: v_mov_b32_e32 v3, 6 3583; HSA-NEXT: s_mov_b32 s32, 0 3584; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 3585; HSA-NEXT: s_getpc_b64 s[8:9] 3586; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v3i32_i32@rel32@lo+4 3587; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v3i32_i32@rel32@hi+12 3588; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3589; HSA-NEXT: s_endpgm 3590 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6) 3591 ret void 3592} 3593 3594define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { 3595; VI-LABEL: test_call_external_void_func_v4i32: 3596; VI: ; %bb.0: 3597; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3598; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3599; VI-NEXT: s_mov_b32 s38, -1 3600; VI-NEXT: s_mov_b32 s39, 0xe80000 3601; VI-NEXT: s_add_u32 s36, s36, s3 3602; VI-NEXT: s_mov_b32 s3, 0xf000 3603; VI-NEXT: s_mov_b32 s2, -1 3604; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3605; VI-NEXT: s_addc_u32 s37, s37, 0 3606; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3607; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3608; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3609; VI-NEXT: s_mov_b32 s32, 0 3610; VI-NEXT: s_getpc_b64 s[4:5] 3611; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3612; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3613; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3614; VI-NEXT: s_endpgm 3615; 3616; CI-LABEL: test_call_external_void_func_v4i32: 3617; CI: ; %bb.0: 3618; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3619; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3620; CI-NEXT: s_mov_b32 s38, -1 3621; CI-NEXT: s_mov_b32 s39, 0xe8f000 3622; CI-NEXT: s_add_u32 s36, s36, s3 3623; CI-NEXT: s_mov_b32 s3, 0xf000 3624; CI-NEXT: s_mov_b32 s2, -1 3625; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3626; CI-NEXT: s_addc_u32 s37, s37, 0 3627; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3628; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3629; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3630; CI-NEXT: s_mov_b32 s32, 0 3631; CI-NEXT: s_getpc_b64 s[4:5] 3632; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3633; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3634; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3635; CI-NEXT: s_endpgm 3636; 3637; GFX9-LABEL: test_call_external_void_func_v4i32: 3638; GFX9: ; %bb.0: 3639; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3640; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3641; GFX9-NEXT: s_mov_b32 s38, -1 3642; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3643; GFX9-NEXT: s_add_u32 s36, s36, s3 3644; GFX9-NEXT: s_mov_b32 s3, 0xf000 3645; GFX9-NEXT: s_mov_b32 s2, -1 3646; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3647; GFX9-NEXT: s_addc_u32 s37, s37, 0 3648; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3649; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3650; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3651; GFX9-NEXT: s_mov_b32 s32, 0 3652; GFX9-NEXT: s_getpc_b64 s[4:5] 3653; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3654; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3655; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3656; GFX9-NEXT: s_endpgm 3657; 3658; GFX11-LABEL: test_call_external_void_func_v4i32: 3659; GFX11: ; %bb.0: 3660; GFX11-NEXT: s_mov_b32 s3, 0x31016000 3661; GFX11-NEXT: s_mov_b32 s2, -1 3662; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3663; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 3664; GFX11-NEXT: s_mov_b32 s32, 0 3665; GFX11-NEXT: s_getpc_b64 s[2:3] 3666; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4 3667; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12 3668; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3669; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3670; GFX11-NEXT: s_endpgm 3671; 3672; HSA-LABEL: test_call_external_void_func_v4i32: 3673; HSA: ; %bb.0: 3674; HSA-NEXT: s_add_i32 s6, s6, s9 3675; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3676; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3677; HSA-NEXT: s_mov_b32 s7, 0x1100f000 3678; HSA-NEXT: s_mov_b32 s6, -1 3679; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 3680; HSA-NEXT: s_add_u32 s0, s0, s9 3681; HSA-NEXT: s_addc_u32 s1, s1, 0 3682; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3683; HSA-NEXT: s_mov_b32 s32, 0 3684; HSA-NEXT: s_getpc_b64 s[8:9] 3685; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4 3686; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12 3687; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3688; HSA-NEXT: s_endpgm 3689 %val = load <4 x i32>, ptr addrspace(1) undef 3690 call void @external_void_func_v4i32(<4 x i32> %val) 3691 ret void 3692} 3693 3694define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { 3695; VI-LABEL: test_call_external_void_func_v4i32_imm: 3696; VI: ; %bb.0: 3697; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3698; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3699; VI-NEXT: s_mov_b32 s38, -1 3700; VI-NEXT: s_mov_b32 s39, 0xe80000 3701; VI-NEXT: s_add_u32 s36, s36, s3 3702; VI-NEXT: s_addc_u32 s37, s37, 0 3703; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3704; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3705; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3706; VI-NEXT: v_mov_b32_e32 v0, 1 3707; VI-NEXT: v_mov_b32_e32 v1, 2 3708; VI-NEXT: v_mov_b32_e32 v2, 3 3709; VI-NEXT: v_mov_b32_e32 v3, 4 3710; VI-NEXT: s_mov_b32 s32, 0 3711; VI-NEXT: s_getpc_b64 s[4:5] 3712; VI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3713; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3714; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3715; VI-NEXT: s_endpgm 3716; 3717; CI-LABEL: test_call_external_void_func_v4i32_imm: 3718; CI: ; %bb.0: 3719; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3720; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3721; CI-NEXT: s_mov_b32 s38, -1 3722; CI-NEXT: s_mov_b32 s39, 0xe8f000 3723; CI-NEXT: s_add_u32 s36, s36, s3 3724; CI-NEXT: s_addc_u32 s37, s37, 0 3725; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3726; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3727; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3728; CI-NEXT: v_mov_b32_e32 v0, 1 3729; CI-NEXT: v_mov_b32_e32 v1, 2 3730; CI-NEXT: v_mov_b32_e32 v2, 3 3731; CI-NEXT: v_mov_b32_e32 v3, 4 3732; CI-NEXT: s_mov_b32 s32, 0 3733; CI-NEXT: s_getpc_b64 s[4:5] 3734; CI-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3735; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3736; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3737; CI-NEXT: s_endpgm 3738; 3739; GFX9-LABEL: test_call_external_void_func_v4i32_imm: 3740; GFX9: ; %bb.0: 3741; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3742; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3743; GFX9-NEXT: s_mov_b32 s38, -1 3744; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3745; GFX9-NEXT: s_add_u32 s36, s36, s3 3746; GFX9-NEXT: s_addc_u32 s37, s37, 0 3747; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3748; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3749; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3750; GFX9-NEXT: v_mov_b32_e32 v0, 1 3751; GFX9-NEXT: v_mov_b32_e32 v1, 2 3752; GFX9-NEXT: v_mov_b32_e32 v2, 3 3753; GFX9-NEXT: v_mov_b32_e32 v3, 4 3754; GFX9-NEXT: s_mov_b32 s32, 0 3755; GFX9-NEXT: s_getpc_b64 s[4:5] 3756; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4 3757; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12 3758; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3759; GFX9-NEXT: s_endpgm 3760; 3761; GFX11-LABEL: test_call_external_void_func_v4i32_imm: 3762; GFX11: ; %bb.0: 3763; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 3764; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 3765; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3766; GFX11-NEXT: s_mov_b32 s32, 0 3767; GFX11-NEXT: s_getpc_b64 s[2:3] 3768; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4 3769; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12 3770; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3771; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3772; GFX11-NEXT: s_endpgm 3773; 3774; HSA-LABEL: test_call_external_void_func_v4i32_imm: 3775; HSA: ; %bb.0: 3776; HSA-NEXT: s_add_i32 s6, s6, s9 3777; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3778; HSA-NEXT: s_add_u32 s0, s0, s9 3779; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3780; HSA-NEXT: s_addc_u32 s1, s1, 0 3781; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3782; HSA-NEXT: v_mov_b32_e32 v0, 1 3783; HSA-NEXT: v_mov_b32_e32 v1, 2 3784; HSA-NEXT: v_mov_b32_e32 v2, 3 3785; HSA-NEXT: v_mov_b32_e32 v3, 4 3786; HSA-NEXT: s_mov_b32 s32, 0 3787; HSA-NEXT: s_getpc_b64 s[8:9] 3788; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4 3789; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12 3790; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3791; HSA-NEXT: s_endpgm 3792 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>) 3793 ret void 3794} 3795 3796define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { 3797; VI-LABEL: test_call_external_void_func_v5i32_imm: 3798; VI: ; %bb.0: 3799; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3800; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3801; VI-NEXT: s_mov_b32 s38, -1 3802; VI-NEXT: s_mov_b32 s39, 0xe80000 3803; VI-NEXT: s_add_u32 s36, s36, s3 3804; VI-NEXT: s_addc_u32 s37, s37, 0 3805; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3806; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3807; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3808; VI-NEXT: v_mov_b32_e32 v0, 1 3809; VI-NEXT: v_mov_b32_e32 v1, 2 3810; VI-NEXT: v_mov_b32_e32 v2, 3 3811; VI-NEXT: v_mov_b32_e32 v3, 4 3812; VI-NEXT: v_mov_b32_e32 v4, 5 3813; VI-NEXT: s_mov_b32 s32, 0 3814; VI-NEXT: s_getpc_b64 s[4:5] 3815; VI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 3816; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 3817; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3818; VI-NEXT: s_endpgm 3819; 3820; CI-LABEL: test_call_external_void_func_v5i32_imm: 3821; CI: ; %bb.0: 3822; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3823; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3824; CI-NEXT: s_mov_b32 s38, -1 3825; CI-NEXT: s_mov_b32 s39, 0xe8f000 3826; CI-NEXT: s_add_u32 s36, s36, s3 3827; CI-NEXT: s_addc_u32 s37, s37, 0 3828; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3829; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3830; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3831; CI-NEXT: v_mov_b32_e32 v0, 1 3832; CI-NEXT: v_mov_b32_e32 v1, 2 3833; CI-NEXT: v_mov_b32_e32 v2, 3 3834; CI-NEXT: v_mov_b32_e32 v3, 4 3835; CI-NEXT: v_mov_b32_e32 v4, 5 3836; CI-NEXT: s_mov_b32 s32, 0 3837; CI-NEXT: s_getpc_b64 s[4:5] 3838; CI-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 3839; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 3840; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3841; CI-NEXT: s_endpgm 3842; 3843; GFX9-LABEL: test_call_external_void_func_v5i32_imm: 3844; GFX9: ; %bb.0: 3845; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3846; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3847; GFX9-NEXT: s_mov_b32 s38, -1 3848; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3849; GFX9-NEXT: s_add_u32 s36, s36, s3 3850; GFX9-NEXT: s_addc_u32 s37, s37, 0 3851; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3852; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3853; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3854; GFX9-NEXT: v_mov_b32_e32 v0, 1 3855; GFX9-NEXT: v_mov_b32_e32 v1, 2 3856; GFX9-NEXT: v_mov_b32_e32 v2, 3 3857; GFX9-NEXT: v_mov_b32_e32 v3, 4 3858; GFX9-NEXT: v_mov_b32_e32 v4, 5 3859; GFX9-NEXT: s_mov_b32 s32, 0 3860; GFX9-NEXT: s_getpc_b64 s[4:5] 3861; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4 3862; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12 3863; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3864; GFX9-NEXT: s_endpgm 3865; 3866; GFX11-LABEL: test_call_external_void_func_v5i32_imm: 3867; GFX11: ; %bb.0: 3868; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 3869; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 3870; GFX11-NEXT: v_mov_b32_e32 v4, 5 3871; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3872; GFX11-NEXT: s_mov_b32 s32, 0 3873; GFX11-NEXT: s_getpc_b64 s[2:3] 3874; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v5i32@rel32@lo+4 3875; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v5i32@rel32@hi+12 3876; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3877; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3878; GFX11-NEXT: s_endpgm 3879; 3880; HSA-LABEL: test_call_external_void_func_v5i32_imm: 3881; HSA: ; %bb.0: 3882; HSA-NEXT: s_add_i32 s6, s6, s9 3883; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3884; HSA-NEXT: s_add_u32 s0, s0, s9 3885; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 3886; HSA-NEXT: s_addc_u32 s1, s1, 0 3887; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 3888; HSA-NEXT: v_mov_b32_e32 v0, 1 3889; HSA-NEXT: v_mov_b32_e32 v1, 2 3890; HSA-NEXT: v_mov_b32_e32 v2, 3 3891; HSA-NEXT: v_mov_b32_e32 v3, 4 3892; HSA-NEXT: v_mov_b32_e32 v4, 5 3893; HSA-NEXT: s_mov_b32 s32, 0 3894; HSA-NEXT: s_getpc_b64 s[8:9] 3895; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v5i32@rel32@lo+4 3896; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v5i32@rel32@hi+12 3897; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 3898; HSA-NEXT: s_endpgm 3899 call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>) 3900 ret void 3901} 3902 3903define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { 3904; VI-LABEL: test_call_external_void_func_v8i32: 3905; VI: ; %bb.0: 3906; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3907; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 3908; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 3909; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3910; VI-NEXT: s_mov_b32 s38, -1 3911; VI-NEXT: s_mov_b32 s39, 0xe80000 3912; VI-NEXT: s_add_u32 s36, s36, s3 3913; VI-NEXT: s_mov_b32 s3, 0xf000 3914; VI-NEXT: s_mov_b32 s2, -1 3915; VI-NEXT: s_waitcnt lgkmcnt(0) 3916; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3917; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 3918; VI-NEXT: s_addc_u32 s37, s37, 0 3919; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 3920; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 3921; VI-NEXT: s_mov_b32 s32, 0 3922; VI-NEXT: s_getpc_b64 s[4:5] 3923; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 3924; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 3925; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3926; VI-NEXT: s_endpgm 3927; 3928; CI-LABEL: test_call_external_void_func_v8i32: 3929; CI: ; %bb.0: 3930; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3931; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 3932; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 3933; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3934; CI-NEXT: s_mov_b32 s38, -1 3935; CI-NEXT: s_mov_b32 s39, 0xe8f000 3936; CI-NEXT: s_add_u32 s36, s36, s3 3937; CI-NEXT: s_mov_b32 s3, 0xf000 3938; CI-NEXT: s_mov_b32 s2, -1 3939; CI-NEXT: s_waitcnt lgkmcnt(0) 3940; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3941; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 3942; CI-NEXT: s_addc_u32 s37, s37, 0 3943; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 3944; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 3945; CI-NEXT: s_mov_b32 s32, 0 3946; CI-NEXT: s_getpc_b64 s[4:5] 3947; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 3948; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 3949; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 3950; CI-NEXT: s_endpgm 3951; 3952; GFX9-LABEL: test_call_external_void_func_v8i32: 3953; GFX9: ; %bb.0: 3954; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 3955; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 3956; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 3957; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 3958; GFX9-NEXT: s_mov_b32 s38, -1 3959; GFX9-NEXT: s_mov_b32 s39, 0xe00000 3960; GFX9-NEXT: s_add_u32 s36, s36, s3 3961; GFX9-NEXT: s_mov_b32 s3, 0xf000 3962; GFX9-NEXT: s_mov_b32 s2, -1 3963; GFX9-NEXT: s_waitcnt lgkmcnt(0) 3964; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 3965; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 3966; GFX9-NEXT: s_addc_u32 s37, s37, 0 3967; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 3968; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 3969; GFX9-NEXT: s_mov_b32 s32, 0 3970; GFX9-NEXT: s_getpc_b64 s[4:5] 3971; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 3972; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 3973; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 3974; GFX9-NEXT: s_endpgm 3975; 3976; GFX11-LABEL: test_call_external_void_func_v8i32: 3977; GFX11: ; %bb.0: 3978; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 3979; GFX11-NEXT: s_mov_b32 s7, 0x31016000 3980; GFX11-NEXT: s_mov_b32 s6, -1 3981; GFX11-NEXT: s_mov_b32 s32, 0 3982; GFX11-NEXT: s_getpc_b64 s[2:3] 3983; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4 3984; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12 3985; GFX11-NEXT: s_waitcnt lgkmcnt(0) 3986; GFX11-NEXT: s_clause 0x1 3987; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 3988; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 3989; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 3990; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 3991; GFX11-NEXT: s_endpgm 3992; 3993; HSA-LABEL: test_call_external_void_func_v8i32: 3994; HSA: ; %bb.0: 3995; HSA-NEXT: s_add_i32 s6, s6, s9 3996; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 3997; HSA-NEXT: s_add_u32 s0, s0, s9 3998; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 3999; HSA-NEXT: s_mov_b32 s11, 0x1100f000 4000; HSA-NEXT: s_mov_b32 s10, -1 4001; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4002; HSA-NEXT: s_waitcnt lgkmcnt(0) 4003; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 4004; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 4005; HSA-NEXT: s_addc_u32 s1, s1, 0 4006; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4007; HSA-NEXT: s_mov_b32 s32, 0 4008; HSA-NEXT: s_getpc_b64 s[8:9] 4009; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4 4010; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12 4011; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4012; HSA-NEXT: s_endpgm 4013 %ptr = load ptr addrspace(1), ptr addrspace(4) undef 4014 %val = load <8 x i32>, ptr addrspace(1) %ptr 4015 call void @external_void_func_v8i32(<8 x i32> %val) 4016 ret void 4017} 4018 4019define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { 4020; VI-LABEL: test_call_external_void_func_v8i32_imm: 4021; VI: ; %bb.0: 4022; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4023; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4024; VI-NEXT: s_mov_b32 s38, -1 4025; VI-NEXT: s_mov_b32 s39, 0xe80000 4026; VI-NEXT: s_add_u32 s36, s36, s3 4027; VI-NEXT: s_addc_u32 s37, s37, 0 4028; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4029; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4030; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4031; VI-NEXT: v_mov_b32_e32 v0, 1 4032; VI-NEXT: v_mov_b32_e32 v1, 2 4033; VI-NEXT: v_mov_b32_e32 v2, 3 4034; VI-NEXT: v_mov_b32_e32 v3, 4 4035; VI-NEXT: v_mov_b32_e32 v4, 5 4036; VI-NEXT: v_mov_b32_e32 v5, 6 4037; VI-NEXT: v_mov_b32_e32 v6, 7 4038; VI-NEXT: v_mov_b32_e32 v7, 8 4039; VI-NEXT: s_mov_b32 s32, 0 4040; VI-NEXT: s_getpc_b64 s[4:5] 4041; VI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 4042; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 4043; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4044; VI-NEXT: s_endpgm 4045; 4046; CI-LABEL: test_call_external_void_func_v8i32_imm: 4047; CI: ; %bb.0: 4048; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4049; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4050; CI-NEXT: s_mov_b32 s38, -1 4051; CI-NEXT: s_mov_b32 s39, 0xe8f000 4052; CI-NEXT: s_add_u32 s36, s36, s3 4053; CI-NEXT: s_addc_u32 s37, s37, 0 4054; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4055; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4056; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4057; CI-NEXT: v_mov_b32_e32 v0, 1 4058; CI-NEXT: v_mov_b32_e32 v1, 2 4059; CI-NEXT: v_mov_b32_e32 v2, 3 4060; CI-NEXT: v_mov_b32_e32 v3, 4 4061; CI-NEXT: v_mov_b32_e32 v4, 5 4062; CI-NEXT: v_mov_b32_e32 v5, 6 4063; CI-NEXT: v_mov_b32_e32 v6, 7 4064; CI-NEXT: v_mov_b32_e32 v7, 8 4065; CI-NEXT: s_mov_b32 s32, 0 4066; CI-NEXT: s_getpc_b64 s[4:5] 4067; CI-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 4068; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 4069; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4070; CI-NEXT: s_endpgm 4071; 4072; GFX9-LABEL: test_call_external_void_func_v8i32_imm: 4073; GFX9: ; %bb.0: 4074; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4075; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4076; GFX9-NEXT: s_mov_b32 s38, -1 4077; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4078; GFX9-NEXT: s_add_u32 s36, s36, s3 4079; GFX9-NEXT: s_addc_u32 s37, s37, 0 4080; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4081; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4082; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4083; GFX9-NEXT: v_mov_b32_e32 v0, 1 4084; GFX9-NEXT: v_mov_b32_e32 v1, 2 4085; GFX9-NEXT: v_mov_b32_e32 v2, 3 4086; GFX9-NEXT: v_mov_b32_e32 v3, 4 4087; GFX9-NEXT: v_mov_b32_e32 v4, 5 4088; GFX9-NEXT: v_mov_b32_e32 v5, 6 4089; GFX9-NEXT: v_mov_b32_e32 v6, 7 4090; GFX9-NEXT: v_mov_b32_e32 v7, 8 4091; GFX9-NEXT: s_mov_b32 s32, 0 4092; GFX9-NEXT: s_getpc_b64 s[4:5] 4093; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4 4094; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12 4095; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4096; GFX9-NEXT: s_endpgm 4097; 4098; GFX11-LABEL: test_call_external_void_func_v8i32_imm: 4099; GFX11: ; %bb.0: 4100; GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 4101; GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 4102; GFX11-NEXT: v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6 4103; GFX11-NEXT: v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8 4104; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4105; GFX11-NEXT: s_mov_b32 s32, 0 4106; GFX11-NEXT: s_getpc_b64 s[2:3] 4107; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4 4108; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12 4109; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4110; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4111; GFX11-NEXT: s_endpgm 4112; 4113; HSA-LABEL: test_call_external_void_func_v8i32_imm: 4114; HSA: ; %bb.0: 4115; HSA-NEXT: s_add_i32 s6, s6, s9 4116; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 4117; HSA-NEXT: s_add_u32 s0, s0, s9 4118; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4119; HSA-NEXT: s_addc_u32 s1, s1, 0 4120; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4121; HSA-NEXT: v_mov_b32_e32 v0, 1 4122; HSA-NEXT: v_mov_b32_e32 v1, 2 4123; HSA-NEXT: v_mov_b32_e32 v2, 3 4124; HSA-NEXT: v_mov_b32_e32 v3, 4 4125; HSA-NEXT: v_mov_b32_e32 v4, 5 4126; HSA-NEXT: v_mov_b32_e32 v5, 6 4127; HSA-NEXT: v_mov_b32_e32 v6, 7 4128; HSA-NEXT: v_mov_b32_e32 v7, 8 4129; HSA-NEXT: s_mov_b32 s32, 0 4130; HSA-NEXT: s_getpc_b64 s[8:9] 4131; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4 4132; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12 4133; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4134; HSA-NEXT: s_endpgm 4135 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>) 4136 ret void 4137} 4138 4139define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { 4140; VI-LABEL: test_call_external_void_func_v16i32: 4141; VI: ; %bb.0: 4142; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4143; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4144; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4145; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4146; VI-NEXT: s_mov_b32 s38, -1 4147; VI-NEXT: s_mov_b32 s39, 0xe80000 4148; VI-NEXT: s_add_u32 s36, s36, s3 4149; VI-NEXT: s_mov_b32 s3, 0xf000 4150; VI-NEXT: s_mov_b32 s2, -1 4151; VI-NEXT: s_waitcnt lgkmcnt(0) 4152; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 4153; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 4154; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 4155; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 4156; VI-NEXT: s_addc_u32 s37, s37, 0 4157; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4158; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4159; VI-NEXT: s_mov_b32 s32, 0 4160; VI-NEXT: s_getpc_b64 s[4:5] 4161; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 4162; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 4163; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4164; VI-NEXT: s_endpgm 4165; 4166; CI-LABEL: test_call_external_void_func_v16i32: 4167; CI: ; %bb.0: 4168; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4169; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4170; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4171; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4172; CI-NEXT: s_mov_b32 s38, -1 4173; CI-NEXT: s_mov_b32 s39, 0xe8f000 4174; CI-NEXT: s_add_u32 s36, s36, s3 4175; CI-NEXT: s_mov_b32 s3, 0xf000 4176; CI-NEXT: s_mov_b32 s2, -1 4177; CI-NEXT: s_waitcnt lgkmcnt(0) 4178; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 4179; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 4180; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 4181; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 4182; CI-NEXT: s_addc_u32 s37, s37, 0 4183; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4184; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4185; CI-NEXT: s_mov_b32 s32, 0 4186; CI-NEXT: s_getpc_b64 s[4:5] 4187; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 4188; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 4189; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4190; CI-NEXT: s_endpgm 4191; 4192; GFX9-LABEL: test_call_external_void_func_v16i32: 4193; GFX9: ; %bb.0: 4194; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4195; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4196; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4197; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4198; GFX9-NEXT: s_mov_b32 s38, -1 4199; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4200; GFX9-NEXT: s_add_u32 s36, s36, s3 4201; GFX9-NEXT: s_mov_b32 s3, 0xf000 4202; GFX9-NEXT: s_mov_b32 s2, -1 4203; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4204; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 4205; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16 4206; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32 4207; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48 4208; GFX9-NEXT: s_addc_u32 s37, s37, 0 4209; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4210; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4211; GFX9-NEXT: s_mov_b32 s32, 0 4212; GFX9-NEXT: s_getpc_b64 s[4:5] 4213; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4 4214; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12 4215; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4216; GFX9-NEXT: s_endpgm 4217; 4218; GFX11-LABEL: test_call_external_void_func_v16i32: 4219; GFX11: ; %bb.0: 4220; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 4221; GFX11-NEXT: s_mov_b32 s7, 0x31016000 4222; GFX11-NEXT: s_mov_b32 s6, -1 4223; GFX11-NEXT: s_mov_b32 s32, 0 4224; GFX11-NEXT: s_getpc_b64 s[2:3] 4225; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i32@rel32@lo+4 4226; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i32@rel32@hi+12 4227; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4228; GFX11-NEXT: s_clause 0x3 4229; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 4230; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 4231; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 4232; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 4233; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4234; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4235; GFX11-NEXT: s_endpgm 4236; 4237; HSA-LABEL: test_call_external_void_func_v16i32: 4238; HSA: ; %bb.0: 4239; HSA-NEXT: s_add_i32 s6, s6, s9 4240; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 4241; HSA-NEXT: s_add_u32 s0, s0, s9 4242; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4243; HSA-NEXT: s_mov_b32 s11, 0x1100f000 4244; HSA-NEXT: s_mov_b32 s10, -1 4245; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4246; HSA-NEXT: s_waitcnt lgkmcnt(0) 4247; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 4248; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 4249; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 4250; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 4251; HSA-NEXT: s_addc_u32 s1, s1, 0 4252; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4253; HSA-NEXT: s_mov_b32 s32, 0 4254; HSA-NEXT: s_getpc_b64 s[8:9] 4255; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i32@rel32@lo+4 4256; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i32@rel32@hi+12 4257; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4258; HSA-NEXT: s_endpgm 4259 %ptr = load ptr addrspace(1), ptr addrspace(4) undef 4260 %val = load <16 x i32>, ptr addrspace(1) %ptr 4261 call void @external_void_func_v16i32(<16 x i32> %val) 4262 ret void 4263} 4264 4265define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { 4266; VI-LABEL: test_call_external_void_func_v32i32: 4267; VI: ; %bb.0: 4268; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4269; VI-NEXT: s_mov_b32 s7, 0xf000 4270; VI-NEXT: s_mov_b32 s6, -1 4271; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4272; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4273; VI-NEXT: s_waitcnt lgkmcnt(0) 4274; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4275; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4276; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4277; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4278; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4279; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4280; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4281; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4282; VI-NEXT: s_mov_b32 s38, -1 4283; VI-NEXT: s_mov_b32 s39, 0xe80000 4284; VI-NEXT: s_add_u32 s36, s36, s3 4285; VI-NEXT: s_addc_u32 s37, s37, 0 4286; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4287; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4288; VI-NEXT: s_mov_b32 s32, 0 4289; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4290; VI-NEXT: s_getpc_b64 s[8:9] 4291; VI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 4292; VI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 4293; VI-NEXT: s_waitcnt vmcnt(7) 4294; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 4295; VI-NEXT: s_swappc_b64 s[30:31], s[8:9] 4296; VI-NEXT: s_endpgm 4297; 4298; CI-LABEL: test_call_external_void_func_v32i32: 4299; CI: ; %bb.0: 4300; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4301; CI-NEXT: s_mov_b32 s7, 0xf000 4302; CI-NEXT: s_mov_b32 s6, -1 4303; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4304; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4305; CI-NEXT: s_waitcnt lgkmcnt(0) 4306; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4307; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4308; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4309; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4310; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4311; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4312; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4313; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4314; CI-NEXT: s_mov_b32 s38, -1 4315; CI-NEXT: s_mov_b32 s39, 0xe8f000 4316; CI-NEXT: s_add_u32 s36, s36, s3 4317; CI-NEXT: s_addc_u32 s37, s37, 0 4318; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4319; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4320; CI-NEXT: s_mov_b32 s32, 0 4321; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4322; CI-NEXT: s_getpc_b64 s[8:9] 4323; CI-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 4324; CI-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 4325; CI-NEXT: s_waitcnt vmcnt(7) 4326; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 4327; CI-NEXT: s_swappc_b64 s[30:31], s[8:9] 4328; CI-NEXT: s_endpgm 4329; 4330; GFX9-LABEL: test_call_external_void_func_v32i32: 4331; GFX9: ; %bb.0: 4332; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4333; GFX9-NEXT: s_mov_b32 s7, 0xf000 4334; GFX9-NEXT: s_mov_b32 s6, -1 4335; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4336; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4337; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4338; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4339; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4340; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4341; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4342; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4343; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4344; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4345; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4346; GFX9-NEXT: s_mov_b32 s38, -1 4347; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4348; GFX9-NEXT: s_add_u32 s36, s36, s3 4349; GFX9-NEXT: s_addc_u32 s37, s37, 0 4350; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4351; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4352; GFX9-NEXT: s_mov_b32 s32, 0 4353; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4354; GFX9-NEXT: s_getpc_b64 s[8:9] 4355; GFX9-NEXT: s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4 4356; GFX9-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12 4357; GFX9-NEXT: s_waitcnt vmcnt(7) 4358; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 4359; GFX9-NEXT: s_swappc_b64 s[30:31], s[8:9] 4360; GFX9-NEXT: s_endpgm 4361; 4362; GFX11-LABEL: test_call_external_void_func_v32i32: 4363; GFX11: ; %bb.0: 4364; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 4365; GFX11-NEXT: s_mov_b32 s7, 0x31016000 4366; GFX11-NEXT: s_mov_b32 s6, -1 4367; GFX11-NEXT: s_mov_b32 s32, 0 4368; GFX11-NEXT: s_getpc_b64 s[2:3] 4369; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4 4370; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12 4371; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4372; GFX11-NEXT: s_clause 0x7 4373; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112 4374; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 4375; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 4376; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 4377; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 4378; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64 4379; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80 4380; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96 4381; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4382; GFX11-NEXT: s_waitcnt vmcnt(7) 4383; GFX11-NEXT: scratch_store_b32 off, v31, s32 4384; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4385; GFX11-NEXT: s_endpgm 4386; 4387; HSA-LABEL: test_call_external_void_func_v32i32: 4388; HSA: ; %bb.0: 4389; HSA-NEXT: s_add_i32 s6, s6, s9 4390; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 4391; HSA-NEXT: s_add_u32 s0, s0, s9 4392; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4393; HSA-NEXT: s_mov_b32 s11, 0x1100f000 4394; HSA-NEXT: s_mov_b32 s10, -1 4395; HSA-NEXT: s_mov_b32 s32, 0 4396; HSA-NEXT: s_waitcnt lgkmcnt(0) 4397; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 4398; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 4399; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 4400; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 4401; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 4402; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 4403; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 4404; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 4405; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4406; HSA-NEXT: s_addc_u32 s1, s1, 0 4407; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4408; HSA-NEXT: s_getpc_b64 s[12:13] 4409; HSA-NEXT: s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4 4410; HSA-NEXT: s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12 4411; HSA-NEXT: s_waitcnt vmcnt(7) 4412; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 4413; HSA-NEXT: s_swappc_b64 s[30:31], s[12:13] 4414; HSA-NEXT: s_endpgm 4415 %ptr = load ptr addrspace(1), ptr addrspace(4) undef 4416 %val = load <32 x i32>, ptr addrspace(1) %ptr 4417 call void @external_void_func_v32i32(<32 x i32> %val) 4418 ret void 4419} 4420 4421define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { 4422; VI-LABEL: test_call_external_void_func_v32i32_i32: 4423; VI: ; %bb.0: 4424; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4425; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4426; VI-NEXT: s_mov_b32 s38, -1 4427; VI-NEXT: s_mov_b32 s39, 0xe80000 4428; VI-NEXT: s_add_u32 s36, s36, s5 4429; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4430; VI-NEXT: s_mov_b32 s7, 0xf000 4431; VI-NEXT: s_mov_b32 s6, -1 4432; VI-NEXT: s_addc_u32 s37, s37, 0 4433; VI-NEXT: s_waitcnt lgkmcnt(0) 4434; VI-NEXT: buffer_load_dword v32, off, s[4:7], 0 4435; VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4436; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4437; VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4438; VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4439; VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4440; VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4441; VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4442; VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4443; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4444; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4445; VI-NEXT: s_mov_b32 s32, 0 4446; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4447; VI-NEXT: s_getpc_b64 s[4:5] 4448; VI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 4449; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 4450; VI-NEXT: s_waitcnt vmcnt(8) 4451; VI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 4452; VI-NEXT: s_waitcnt vmcnt(8) 4453; VI-NEXT: buffer_store_dword v31, off, s[36:39], s32 4454; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4455; VI-NEXT: s_endpgm 4456; 4457; CI-LABEL: test_call_external_void_func_v32i32_i32: 4458; CI: ; %bb.0: 4459; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4460; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4461; CI-NEXT: s_mov_b32 s38, -1 4462; CI-NEXT: s_mov_b32 s39, 0xe8f000 4463; CI-NEXT: s_add_u32 s36, s36, s5 4464; CI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4465; CI-NEXT: s_mov_b32 s7, 0xf000 4466; CI-NEXT: s_mov_b32 s6, -1 4467; CI-NEXT: s_addc_u32 s37, s37, 0 4468; CI-NEXT: s_waitcnt lgkmcnt(0) 4469; CI-NEXT: buffer_load_dword v32, off, s[4:7], 0 4470; CI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4471; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4472; CI-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4473; CI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4474; CI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4475; CI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4476; CI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4477; CI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4478; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4479; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4480; CI-NEXT: s_mov_b32 s32, 0 4481; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4482; CI-NEXT: s_getpc_b64 s[4:5] 4483; CI-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 4484; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 4485; CI-NEXT: s_waitcnt vmcnt(8) 4486; CI-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 4487; CI-NEXT: s_waitcnt vmcnt(8) 4488; CI-NEXT: buffer_store_dword v31, off, s[36:39], s32 4489; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4490; CI-NEXT: s_endpgm 4491; 4492; GFX9-LABEL: test_call_external_void_func_v32i32_i32: 4493; GFX9: ; %bb.0: 4494; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4495; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4496; GFX9-NEXT: s_mov_b32 s38, -1 4497; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4498; GFX9-NEXT: s_add_u32 s36, s36, s5 4499; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 4500; GFX9-NEXT: s_mov_b32 s7, 0xf000 4501; GFX9-NEXT: s_mov_b32 s6, -1 4502; GFX9-NEXT: s_addc_u32 s37, s37, 0 4503; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4504; GFX9-NEXT: buffer_load_dword v32, off, s[4:7], 0 4505; GFX9-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112 4506; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 4507; GFX9-NEXT: buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16 4508; GFX9-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32 4509; GFX9-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48 4510; GFX9-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64 4511; GFX9-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4512; GFX9-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96 4513; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4514; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4515; GFX9-NEXT: s_mov_b32 s32, 0 4516; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4517; GFX9-NEXT: s_getpc_b64 s[4:5] 4518; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4 4519; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12 4520; GFX9-NEXT: s_waitcnt vmcnt(8) 4521; GFX9-NEXT: buffer_store_dword v32, off, s[36:39], s32 offset:4 4522; GFX9-NEXT: s_waitcnt vmcnt(8) 4523; GFX9-NEXT: buffer_store_dword v31, off, s[36:39], s32 4524; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4525; GFX9-NEXT: s_endpgm 4526; 4527; GFX11-LABEL: test_call_external_void_func_v32i32_i32: 4528; GFX11: ; %bb.0: 4529; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 4530; GFX11-NEXT: s_mov_b32 s7, 0x31016000 4531; GFX11-NEXT: s_mov_b32 s6, -1 4532; GFX11-NEXT: s_mov_b32 s32, 0 4533; GFX11-NEXT: s_getpc_b64 s[2:3] 4534; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v32i32_i32@rel32@lo+4 4535; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v32i32_i32@rel32@hi+12 4536; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4537; GFX11-NEXT: s_clause 0x8 4538; GFX11-NEXT: buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112 4539; GFX11-NEXT: buffer_load_b32 v32, off, s[4:7], 0 4540; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 4541; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16 4542; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32 4543; GFX11-NEXT: buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48 4544; GFX11-NEXT: buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64 4545; GFX11-NEXT: buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80 4546; GFX11-NEXT: buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96 4547; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4548; GFX11-NEXT: s_add_i32 s4, s32, 4 4549; GFX11-NEXT: s_waitcnt vmcnt(8) 4550; GFX11-NEXT: scratch_store_b32 off, v31, s32 4551; GFX11-NEXT: s_waitcnt vmcnt(7) 4552; GFX11-NEXT: scratch_store_b32 off, v32, s4 4553; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4554; GFX11-NEXT: s_endpgm 4555; 4556; HSA-LABEL: test_call_external_void_func_v32i32_i32: 4557; HSA: ; %bb.0: 4558; HSA-NEXT: s_add_i32 s8, s8, s11 4559; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 4560; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 4561; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4562; HSA-NEXT: s_add_u32 s0, s0, s11 4563; HSA-NEXT: s_mov_b32 s11, 0x1100f000 4564; HSA-NEXT: s_mov_b32 s10, -1 4565; HSA-NEXT: s_waitcnt lgkmcnt(0) 4566; HSA-NEXT: buffer_load_dword v32, off, s[8:11], 0 4567; HSA-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112 4568; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 4569; HSA-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 4570; HSA-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 4571; HSA-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 4572; HSA-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64 4573; HSA-NEXT: buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80 4574; HSA-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96 4575; HSA-NEXT: s_mov_b32 s32, 0 4576; HSA-NEXT: s_addc_u32 s1, s1, 0 4577; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4578; HSA-NEXT: s_getpc_b64 s[8:9] 4579; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v32i32_i32@rel32@lo+4 4580; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v32i32_i32@rel32@hi+12 4581; HSA-NEXT: s_waitcnt vmcnt(8) 4582; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 4583; HSA-NEXT: s_waitcnt vmcnt(8) 4584; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 4585; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4586; HSA-NEXT: s_endpgm 4587 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef 4588 %val0 = load <32 x i32>, ptr addrspace(1) %ptr0 4589 %val1 = load i32, ptr addrspace(1) undef 4590 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) 4591 ret void 4592} 4593 4594define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 { 4595; VI-LABEL: test_call_external_i32_func_i32_imm: 4596; VI: ; %bb.0: 4597; VI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 4598; VI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 4599; VI-NEXT: s_mov_b32 s42, -1 4600; VI-NEXT: s_mov_b32 s43, 0xe80000 4601; VI-NEXT: s_add_u32 s40, s40, s5 4602; VI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 4603; VI-NEXT: s_addc_u32 s41, s41, 0 4604; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4605; VI-NEXT: s_mov_b64 s[0:1], s[40:41] 4606; VI-NEXT: s_mov_b64 s[2:3], s[42:43] 4607; VI-NEXT: v_mov_b32_e32 v0, 42 4608; VI-NEXT: s_mov_b32 s32, 0 4609; VI-NEXT: s_mov_b32 s39, 0xf000 4610; VI-NEXT: s_mov_b32 s38, -1 4611; VI-NEXT: s_getpc_b64 s[4:5] 4612; VI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 4613; VI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 4614; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4615; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 4616; VI-NEXT: s_waitcnt vmcnt(0) 4617; VI-NEXT: s_endpgm 4618; 4619; CI-LABEL: test_call_external_i32_func_i32_imm: 4620; CI: ; %bb.0: 4621; CI-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 4622; CI-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 4623; CI-NEXT: s_mov_b32 s42, -1 4624; CI-NEXT: s_mov_b32 s43, 0xe8f000 4625; CI-NEXT: s_add_u32 s40, s40, s5 4626; CI-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x9 4627; CI-NEXT: s_addc_u32 s41, s41, 0 4628; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4629; CI-NEXT: s_mov_b64 s[0:1], s[40:41] 4630; CI-NEXT: s_mov_b64 s[2:3], s[42:43] 4631; CI-NEXT: v_mov_b32_e32 v0, 42 4632; CI-NEXT: s_mov_b32 s32, 0 4633; CI-NEXT: s_mov_b32 s39, 0xf000 4634; CI-NEXT: s_mov_b32 s38, -1 4635; CI-NEXT: s_getpc_b64 s[4:5] 4636; CI-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 4637; CI-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 4638; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4639; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 4640; CI-NEXT: s_waitcnt vmcnt(0) 4641; CI-NEXT: s_endpgm 4642; 4643; GFX9-LABEL: test_call_external_i32_func_i32_imm: 4644; GFX9: ; %bb.0: 4645; GFX9-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 4646; GFX9-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 4647; GFX9-NEXT: s_mov_b32 s42, -1 4648; GFX9-NEXT: s_mov_b32 s43, 0xe00000 4649; GFX9-NEXT: s_add_u32 s40, s40, s5 4650; GFX9-NEXT: s_load_dwordx2 s[36:37], s[2:3], 0x24 4651; GFX9-NEXT: s_addc_u32 s41, s41, 0 4652; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4653; GFX9-NEXT: s_mov_b64 s[0:1], s[40:41] 4654; GFX9-NEXT: s_mov_b64 s[2:3], s[42:43] 4655; GFX9-NEXT: v_mov_b32_e32 v0, 42 4656; GFX9-NEXT: s_mov_b32 s32, 0 4657; GFX9-NEXT: s_mov_b32 s39, 0xf000 4658; GFX9-NEXT: s_mov_b32 s38, -1 4659; GFX9-NEXT: s_getpc_b64 s[4:5] 4660; GFX9-NEXT: s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4 4661; GFX9-NEXT: s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12 4662; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4663; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 4664; GFX9-NEXT: s_waitcnt vmcnt(0) 4665; GFX9-NEXT: s_endpgm 4666; 4667; GFX11-LABEL: test_call_external_i32_func_i32_imm: 4668; GFX11: ; %bb.0: 4669; GFX11-NEXT: s_load_b64 s[36:37], s[2:3], 0x24 4670; GFX11-NEXT: v_mov_b32_e32 v0, 42 4671; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4672; GFX11-NEXT: s_mov_b32 s32, 0 4673; GFX11-NEXT: s_mov_b32 s39, 0x31016000 4674; GFX11-NEXT: s_mov_b32 s38, -1 4675; GFX11-NEXT: s_getpc_b64 s[2:3] 4676; GFX11-NEXT: s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4 4677; GFX11-NEXT: s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12 4678; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4679; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4680; GFX11-NEXT: buffer_store_b32 v0, off, s[36:39], 0 dlc 4681; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4682; GFX11-NEXT: s_endpgm 4683; 4684; HSA-LABEL: test_call_external_i32_func_i32_imm: 4685; HSA: ; %bb.0: 4686; HSA-NEXT: s_add_i32 s8, s8, s11 4687; HSA-NEXT: s_load_dwordx2 s[36:37], s[6:7], 0x0 4688; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 4689; HSA-NEXT: s_add_u32 s0, s0, s11 4690; HSA-NEXT: s_addc_u32 s1, s1, 0 4691; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4692; HSA-NEXT: v_mov_b32_e32 v0, 42 4693; HSA-NEXT: s_mov_b32 s32, 0 4694; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 4695; HSA-NEXT: s_mov_b32 s39, 0x1100f000 4696; HSA-NEXT: s_mov_b32 s38, -1 4697; HSA-NEXT: s_getpc_b64 s[8:9] 4698; HSA-NEXT: s_add_u32 s8, s8, external_i32_func_i32@rel32@lo+4 4699; HSA-NEXT: s_addc_u32 s9, s9, external_i32_func_i32@rel32@hi+12 4700; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4701; HSA-NEXT: buffer_store_dword v0, off, s[36:39], 0 4702; HSA-NEXT: s_waitcnt vmcnt(0) 4703; HSA-NEXT: s_endpgm 4704 %val = call i32 @external_i32_func_i32(i32 42) 4705 store volatile i32 %val, ptr addrspace(1) %out 4706 ret void 4707} 4708 4709define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { 4710; VI-LABEL: test_call_external_void_func_struct_i8_i32: 4711; VI: ; %bb.0: 4712; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4713; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4714; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4715; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4716; VI-NEXT: s_mov_b32 s38, -1 4717; VI-NEXT: s_mov_b32 s39, 0xe80000 4718; VI-NEXT: s_add_u32 s36, s36, s3 4719; VI-NEXT: s_mov_b32 s3, 0xf000 4720; VI-NEXT: s_mov_b32 s2, -1 4721; VI-NEXT: s_waitcnt lgkmcnt(0) 4722; VI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 4723; VI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 4724; VI-NEXT: s_addc_u32 s37, s37, 0 4725; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4726; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4727; VI-NEXT: s_mov_b32 s32, 0 4728; VI-NEXT: s_getpc_b64 s[4:5] 4729; VI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 4730; VI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 4731; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4732; VI-NEXT: s_endpgm 4733; 4734; CI-LABEL: test_call_external_void_func_struct_i8_i32: 4735; CI: ; %bb.0: 4736; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4737; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4738; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4739; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4740; CI-NEXT: s_mov_b32 s38, -1 4741; CI-NEXT: s_mov_b32 s39, 0xe8f000 4742; CI-NEXT: s_add_u32 s36, s36, s3 4743; CI-NEXT: s_mov_b32 s3, 0xf000 4744; CI-NEXT: s_mov_b32 s2, -1 4745; CI-NEXT: s_waitcnt lgkmcnt(0) 4746; CI-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 4747; CI-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 4748; CI-NEXT: s_addc_u32 s37, s37, 0 4749; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4750; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4751; CI-NEXT: s_mov_b32 s32, 0 4752; CI-NEXT: s_getpc_b64 s[4:5] 4753; CI-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 4754; CI-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 4755; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4756; CI-NEXT: s_endpgm 4757; 4758; GFX9-LABEL: test_call_external_void_func_struct_i8_i32: 4759; GFX9: ; %bb.0: 4760; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4761; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4762; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 4763; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4764; GFX9-NEXT: s_mov_b32 s38, -1 4765; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4766; GFX9-NEXT: s_add_u32 s36, s36, s3 4767; GFX9-NEXT: s_mov_b32 s3, 0xf000 4768; GFX9-NEXT: s_mov_b32 s2, -1 4769; GFX9-NEXT: s_waitcnt lgkmcnt(0) 4770; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 4771; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4 4772; GFX9-NEXT: s_addc_u32 s37, s37, 0 4773; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4774; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4775; GFX9-NEXT: s_mov_b32 s32, 0 4776; GFX9-NEXT: s_getpc_b64 s[4:5] 4777; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4 4778; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12 4779; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4780; GFX9-NEXT: s_endpgm 4781; 4782; GFX11-LABEL: test_call_external_void_func_struct_i8_i32: 4783; GFX11: ; %bb.0: 4784; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 4785; GFX11-NEXT: s_mov_b32 s7, 0x31016000 4786; GFX11-NEXT: s_mov_b32 s6, -1 4787; GFX11-NEXT: s_mov_b32 s32, 0 4788; GFX11-NEXT: s_getpc_b64 s[2:3] 4789; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_struct_i8_i32@rel32@lo+4 4790; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_struct_i8_i32@rel32@hi+12 4791; GFX11-NEXT: s_waitcnt lgkmcnt(0) 4792; GFX11-NEXT: s_clause 0x1 4793; GFX11-NEXT: buffer_load_u8 v0, off, s[4:7], 0 4794; GFX11-NEXT: buffer_load_b32 v1, off, s[4:7], 0 offset:4 4795; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4796; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4797; GFX11-NEXT: s_endpgm 4798; 4799; HSA-LABEL: test_call_external_void_func_struct_i8_i32: 4800; HSA: ; %bb.0: 4801; HSA-NEXT: s_add_i32 s6, s6, s9 4802; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 4803; HSA-NEXT: s_add_u32 s0, s0, s9 4804; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 4805; HSA-NEXT: s_mov_b32 s11, 0x1100f000 4806; HSA-NEXT: s_mov_b32 s10, -1 4807; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4808; HSA-NEXT: s_waitcnt lgkmcnt(0) 4809; HSA-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 4810; HSA-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:4 4811; HSA-NEXT: s_addc_u32 s1, s1, 0 4812; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4813; HSA-NEXT: s_mov_b32 s32, 0 4814; HSA-NEXT: s_getpc_b64 s[8:9] 4815; HSA-NEXT: s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4 4816; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12 4817; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4818; HSA-NEXT: s_endpgm 4819 %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef 4820 %val = load { i8, i32 }, ptr addrspace(1) %ptr0 4821 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) 4822 ret void 4823} 4824 4825define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { 4826; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32: 4827; VI: ; %bb.0: 4828; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4829; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4830; VI-NEXT: s_mov_b32 s38, -1 4831; VI-NEXT: s_mov_b32 s39, 0xe80000 4832; VI-NEXT: s_add_u32 s36, s36, s3 4833; VI-NEXT: s_addc_u32 s37, s37, 0 4834; VI-NEXT: v_mov_b32_e32 v0, 3 4835; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 4836; VI-NEXT: v_mov_b32_e32 v0, 8 4837; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 4838; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 4839; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 4840; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4841; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4842; VI-NEXT: s_movk_i32 s32, 0x400 4843; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4844; VI-NEXT: s_getpc_b64 s[4:5] 4845; VI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 4846; VI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 4847; VI-NEXT: s_waitcnt vmcnt(1) 4848; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 4849; VI-NEXT: s_waitcnt vmcnt(1) 4850; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 4851; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4852; VI-NEXT: s_endpgm 4853; 4854; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32: 4855; CI: ; %bb.0: 4856; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4857; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4858; CI-NEXT: s_mov_b32 s38, -1 4859; CI-NEXT: s_mov_b32 s39, 0xe8f000 4860; CI-NEXT: s_add_u32 s36, s36, s3 4861; CI-NEXT: s_addc_u32 s37, s37, 0 4862; CI-NEXT: v_mov_b32_e32 v0, 3 4863; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 4864; CI-NEXT: v_mov_b32_e32 v0, 8 4865; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 4866; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 4867; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 4868; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 4869; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 4870; CI-NEXT: s_movk_i32 s32, 0x400 4871; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 4872; CI-NEXT: s_getpc_b64 s[4:5] 4873; CI-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 4874; CI-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 4875; CI-NEXT: s_waitcnt vmcnt(1) 4876; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 4877; CI-NEXT: s_waitcnt vmcnt(1) 4878; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 4879; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4880; CI-NEXT: s_endpgm 4881; 4882; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32: 4883; GFX9: ; %bb.0: 4884; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4885; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4886; GFX9-NEXT: s_mov_b32 s38, -1 4887; GFX9-NEXT: s_mov_b32 s39, 0xe00000 4888; GFX9-NEXT: s_add_u32 s36, s36, s3 4889; GFX9-NEXT: s_addc_u32 s37, s37, 0 4890; GFX9-NEXT: v_mov_b32_e32 v0, 3 4891; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 4892; GFX9-NEXT: v_mov_b32_e32 v0, 8 4893; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 4894; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 4895; GFX9-NEXT: s_nop 0 4896; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 4897; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 4898; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 4899; GFX9-NEXT: s_movk_i32 s32, 0x400 4900; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 4901; GFX9-NEXT: s_getpc_b64 s[4:5] 4902; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4 4903; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12 4904; GFX9-NEXT: s_waitcnt vmcnt(1) 4905; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 4906; GFX9-NEXT: s_waitcnt vmcnt(1) 4907; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 4908; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 4909; GFX9-NEXT: s_endpgm 4910; 4911; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32: 4912; GFX11: ; %bb.0: 4913; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 4914; GFX11-NEXT: s_mov_b32 s32, 16 4915; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 4916; GFX11-NEXT: s_getpc_b64 s[2:3] 4917; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4 4918; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12 4919; GFX11-NEXT: s_clause 0x1 4920; GFX11-NEXT: scratch_store_b8 off, v0, off 4921; GFX11-NEXT: scratch_store_b32 off, v1, off offset:4 4922; GFX11-NEXT: scratch_load_b64 v[0:1], off, off 4923; GFX11-NEXT: s_waitcnt vmcnt(0) 4924; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32 4925; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 4926; GFX11-NEXT: s_endpgm 4927; 4928; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32: 4929; HSA: ; %bb.0: 4930; HSA-NEXT: s_add_i32 s6, s6, s9 4931; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 4932; HSA-NEXT: s_add_u32 s0, s0, s9 4933; HSA-NEXT: s_addc_u32 s1, s1, 0 4934; HSA-NEXT: v_mov_b32_e32 v0, 3 4935; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 4936; HSA-NEXT: v_mov_b32_e32 v0, 8 4937; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 4938; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 4939; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 4940; HSA-NEXT: s_movk_i32 s32, 0x400 4941; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 4942; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 4943; HSA-NEXT: s_getpc_b64 s[8:9] 4944; HSA-NEXT: s_add_u32 s8, s8, external_void_func_byval_struct_i8_i32@rel32@lo+4 4945; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_byval_struct_i8_i32@rel32@hi+12 4946; HSA-NEXT: s_waitcnt vmcnt(1) 4947; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 4948; HSA-NEXT: s_waitcnt vmcnt(1) 4949; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32 4950; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 4951; HSA-NEXT: s_endpgm 4952 %val = alloca { i8, i32 }, align 8, addrspace(5) 4953 %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0 4954 %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1 4955 store i8 3, ptr addrspace(5) %gep0 4956 store i32 8, ptr addrspace(5) %gep1 4957 call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val) 4958 ret void 4959} 4960 4961define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { 4962; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 4963; VI: ; %bb.0: 4964; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 4965; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 4966; VI-NEXT: s_mov_b32 s38, -1 4967; VI-NEXT: s_mov_b32 s39, 0xe80000 4968; VI-NEXT: s_add_u32 s36, s36, s5 4969; VI-NEXT: s_addc_u32 s37, s37, 0 4970; VI-NEXT: v_mov_b32_e32 v0, 3 4971; VI-NEXT: buffer_store_byte v0, off, s[36:39], 0 4972; VI-NEXT: v_mov_b32_e32 v0, 8 4973; VI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 4974; VI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 4975; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 4976; VI-NEXT: s_movk_i32 s32, 0x800 4977; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 4978; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 4979; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 4980; VI-NEXT: s_getpc_b64 s[4:5] 4981; VI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 4982; VI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 4983; VI-NEXT: s_waitcnt vmcnt(1) 4984; VI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 4985; VI-NEXT: s_waitcnt vmcnt(1) 4986; VI-NEXT: buffer_store_dword v1, off, s[36:39], s32 4987; VI-NEXT: v_mov_b32_e32 v0, 8 4988; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 4989; VI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 4990; VI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 4991; VI-NEXT: s_mov_b32 s3, 0xf000 4992; VI-NEXT: s_mov_b32 s2, -1 4993; VI-NEXT: s_waitcnt vmcnt(1) 4994; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 4995; VI-NEXT: s_waitcnt vmcnt(0) 4996; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 4997; VI-NEXT: s_waitcnt vmcnt(0) 4998; VI-NEXT: s_endpgm 4999; 5000; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5001; CI: ; %bb.0: 5002; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5003; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5004; CI-NEXT: s_mov_b32 s38, -1 5005; CI-NEXT: s_mov_b32 s39, 0xe8f000 5006; CI-NEXT: s_add_u32 s36, s36, s5 5007; CI-NEXT: s_addc_u32 s37, s37, 0 5008; CI-NEXT: v_mov_b32_e32 v0, 3 5009; CI-NEXT: buffer_store_byte v0, off, s[36:39], 0 5010; CI-NEXT: v_mov_b32_e32 v0, 8 5011; CI-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 5012; CI-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 5013; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 5014; CI-NEXT: s_movk_i32 s32, 0x800 5015; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 5016; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 5017; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 5018; CI-NEXT: s_getpc_b64 s[4:5] 5019; CI-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5020; CI-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5021; CI-NEXT: s_waitcnt vmcnt(1) 5022; CI-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 5023; CI-NEXT: s_waitcnt vmcnt(1) 5024; CI-NEXT: buffer_store_dword v1, off, s[36:39], s32 5025; CI-NEXT: v_mov_b32_e32 v0, 8 5026; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5027; CI-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 5028; CI-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 5029; CI-NEXT: s_mov_b32 s3, 0xf000 5030; CI-NEXT: s_mov_b32 s2, -1 5031; CI-NEXT: s_waitcnt vmcnt(1) 5032; CI-NEXT: buffer_store_byte v0, off, s[0:3], 0 5033; CI-NEXT: s_waitcnt vmcnt(0) 5034; CI-NEXT: buffer_store_dword v1, off, s[0:3], 0 5035; CI-NEXT: s_waitcnt vmcnt(0) 5036; CI-NEXT: s_endpgm 5037; 5038; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5039; GFX9: ; %bb.0: 5040; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5041; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5042; GFX9-NEXT: s_mov_b32 s38, -1 5043; GFX9-NEXT: s_mov_b32 s39, 0xe00000 5044; GFX9-NEXT: s_add_u32 s36, s36, s5 5045; GFX9-NEXT: s_addc_u32 s37, s37, 0 5046; GFX9-NEXT: v_mov_b32_e32 v0, 3 5047; GFX9-NEXT: buffer_store_byte v0, off, s[36:39], 0 5048; GFX9-NEXT: v_mov_b32_e32 v0, 8 5049; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], 0 offset:4 5050; GFX9-NEXT: buffer_load_dword v0, off, s[36:39], 0 offset:4 5051; GFX9-NEXT: s_nop 0 5052; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 5053; GFX9-NEXT: s_movk_i32 s32, 0x800 5054; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 5055; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 5056; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 5057; GFX9-NEXT: s_getpc_b64 s[4:5] 5058; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5059; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5060; GFX9-NEXT: s_waitcnt vmcnt(1) 5061; GFX9-NEXT: buffer_store_dword v0, off, s[36:39], s32 offset:4 5062; GFX9-NEXT: s_waitcnt vmcnt(1) 5063; GFX9-NEXT: buffer_store_dword v1, off, s[36:39], s32 5064; GFX9-NEXT: v_mov_b32_e32 v0, 8 5065; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 5066; GFX9-NEXT: buffer_load_ubyte v0, off, s[36:39], 0 offset:8 5067; GFX9-NEXT: buffer_load_dword v1, off, s[36:39], 0 offset:12 5068; GFX9-NEXT: s_mov_b32 s3, 0xf000 5069; GFX9-NEXT: s_mov_b32 s2, -1 5070; GFX9-NEXT: s_waitcnt vmcnt(1) 5071; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0 5072; GFX9-NEXT: s_waitcnt vmcnt(0) 5073; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], 0 5074; GFX9-NEXT: s_waitcnt vmcnt(0) 5075; GFX9-NEXT: s_endpgm 5076; 5077; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5078; GFX11: ; %bb.0: 5079; GFX11-NEXT: v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8 5080; GFX11-NEXT: s_mov_b32 s32, 32 5081; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 5082; GFX11-NEXT: s_getpc_b64 s[2:3] 5083; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5084; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5085; GFX11-NEXT: s_clause 0x1 5086; GFX11-NEXT: scratch_store_b8 off, v0, off 5087; GFX11-NEXT: scratch_store_b32 off, v1, off offset:4 5088; GFX11-NEXT: scratch_load_b64 v[0:1], off, off 5089; GFX11-NEXT: s_waitcnt vmcnt(0) 5090; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32 5091; GFX11-NEXT: v_mov_b32_e32 v0, 8 5092; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 5093; GFX11-NEXT: s_clause 0x1 5094; GFX11-NEXT: scratch_load_u8 v0, off, off offset:8 5095; GFX11-NEXT: scratch_load_b32 v1, off, off offset:12 5096; GFX11-NEXT: s_mov_b32 s3, 0x31016000 5097; GFX11-NEXT: s_mov_b32 s2, -1 5098; GFX11-NEXT: s_waitcnt vmcnt(1) 5099; GFX11-NEXT: buffer_store_b8 v0, off, s[0:3], 0 dlc 5100; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5101; GFX11-NEXT: s_waitcnt vmcnt(0) 5102; GFX11-NEXT: buffer_store_b32 v1, off, s[0:3], 0 dlc 5103; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5104; GFX11-NEXT: s_nop 0 5105; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 5106; GFX11-NEXT: s_endpgm 5107; 5108; HSA-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 5109; HSA: ; %bb.0: 5110; HSA-NEXT: s_add_i32 s8, s8, s11 5111; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 5112; HSA-NEXT: s_add_u32 s0, s0, s11 5113; HSA-NEXT: s_addc_u32 s1, s1, 0 5114; HSA-NEXT: v_mov_b32_e32 v0, 3 5115; HSA-NEXT: buffer_store_byte v0, off, s[0:3], 0 5116; HSA-NEXT: v_mov_b32_e32 v0, 8 5117; HSA-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 5118; HSA-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 5119; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 5120; HSA-NEXT: s_movk_i32 s32, 0x800 5121; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 5122; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 5123; HSA-NEXT: s_getpc_b64 s[8:9] 5124; HSA-NEXT: s_add_u32 s8, s8, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 5125; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 5126; HSA-NEXT: s_waitcnt vmcnt(1) 5127; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 5128; HSA-NEXT: s_waitcnt vmcnt(1) 5129; HSA-NEXT: buffer_store_dword v1, off, s[0:3], s32 5130; HSA-NEXT: v_mov_b32_e32 v0, 8 5131; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 5132; HSA-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 offset:8 5133; HSA-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:12 5134; HSA-NEXT: s_mov_b32 s7, 0x1100f000 5135; HSA-NEXT: s_mov_b32 s6, -1 5136; HSA-NEXT: s_waitcnt vmcnt(1) 5137; HSA-NEXT: buffer_store_byte v0, off, s[4:7], 0 5138; HSA-NEXT: s_waitcnt vmcnt(0) 5139; HSA-NEXT: buffer_store_dword v1, off, s[4:7], 0 5140; HSA-NEXT: s_waitcnt vmcnt(0) 5141; HSA-NEXT: s_endpgm 5142 %in.val = alloca { i8, i32 }, align 8, addrspace(5) 5143 %out.val = alloca { i8, i32 }, align 8, addrspace(5) 5144 %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0 5145 %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1 5146 store i8 3, ptr addrspace(5) %in.gep0 5147 store i32 8, ptr addrspace(5) %in.gep1 5148 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val) 5149 %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0 5150 %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1 5151 %out.val0 = load i8, ptr addrspace(5) %out.gep0 5152 %out.val1 = load i32, ptr addrspace(5) %out.gep1 5153 5154 store volatile i8 %out.val0, ptr addrspace(1) undef 5155 store volatile i32 %out.val1, ptr addrspace(1) undef 5156 ret void 5157} 5158 5159define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { 5160; VI-LABEL: test_call_external_void_func_v16i8: 5161; VI: ; %bb.0: 5162; VI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5163; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 5164; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5165; VI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5166; VI-NEXT: s_mov_b32 s38, -1 5167; VI-NEXT: s_mov_b32 s39, 0xe80000 5168; VI-NEXT: s_add_u32 s36, s36, s3 5169; VI-NEXT: s_mov_b32 s3, 0xf000 5170; VI-NEXT: s_mov_b32 s2, -1 5171; VI-NEXT: s_waitcnt lgkmcnt(0) 5172; VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 5173; VI-NEXT: s_addc_u32 s37, s37, 0 5174; VI-NEXT: s_mov_b64 s[0:1], s[36:37] 5175; VI-NEXT: s_mov_b64 s[2:3], s[38:39] 5176; VI-NEXT: s_mov_b32 s32, 0 5177; VI-NEXT: s_getpc_b64 s[4:5] 5178; VI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 5179; VI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 5180; VI-NEXT: s_waitcnt vmcnt(0) 5181; VI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 5182; VI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 5183; VI-NEXT: v_lshrrev_b32_e32 v18, 24, v0 5184; VI-NEXT: v_lshrrev_b32_e32 v5, 8, v1 5185; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 5186; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v1 5187; VI-NEXT: v_lshrrev_b32_e32 v9, 8, v2 5188; VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 5189; VI-NEXT: v_lshrrev_b32_e32 v11, 24, v2 5190; VI-NEXT: v_lshrrev_b32_e32 v13, 8, v3 5191; VI-NEXT: v_lshrrev_b32_e32 v14, 16, v3 5192; VI-NEXT: v_lshrrev_b32_e32 v15, 24, v3 5193; VI-NEXT: v_mov_b32_e32 v4, v1 5194; VI-NEXT: v_mov_b32_e32 v8, v2 5195; VI-NEXT: v_mov_b32_e32 v12, v3 5196; VI-NEXT: v_mov_b32_e32 v1, v16 5197; VI-NEXT: v_mov_b32_e32 v2, v17 5198; VI-NEXT: v_mov_b32_e32 v3, v18 5199; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5200; VI-NEXT: s_endpgm 5201; 5202; CI-LABEL: test_call_external_void_func_v16i8: 5203; CI: ; %bb.0: 5204; CI-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5205; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 5206; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5207; CI-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5208; CI-NEXT: s_mov_b32 s38, -1 5209; CI-NEXT: s_mov_b32 s39, 0xe8f000 5210; CI-NEXT: s_add_u32 s36, s36, s3 5211; CI-NEXT: s_mov_b32 s3, 0xf000 5212; CI-NEXT: s_mov_b32 s2, -1 5213; CI-NEXT: s_waitcnt lgkmcnt(0) 5214; CI-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 5215; CI-NEXT: s_addc_u32 s37, s37, 0 5216; CI-NEXT: s_mov_b64 s[0:1], s[36:37] 5217; CI-NEXT: s_mov_b64 s[2:3], s[38:39] 5218; CI-NEXT: s_mov_b32 s32, 0 5219; CI-NEXT: s_getpc_b64 s[4:5] 5220; CI-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 5221; CI-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 5222; CI-NEXT: s_waitcnt vmcnt(0) 5223; CI-NEXT: v_lshrrev_b32_e32 v16, 8, v0 5224; CI-NEXT: v_lshrrev_b32_e32 v17, 16, v0 5225; CI-NEXT: v_lshrrev_b32_e32 v18, 24, v0 5226; CI-NEXT: v_lshrrev_b32_e32 v5, 8, v1 5227; CI-NEXT: v_lshrrev_b32_e32 v6, 16, v1 5228; CI-NEXT: v_lshrrev_b32_e32 v7, 24, v1 5229; CI-NEXT: v_lshrrev_b32_e32 v9, 8, v2 5230; CI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 5231; CI-NEXT: v_lshrrev_b32_e32 v11, 24, v2 5232; CI-NEXT: v_lshrrev_b32_e32 v13, 8, v3 5233; CI-NEXT: v_lshrrev_b32_e32 v14, 16, v3 5234; CI-NEXT: v_lshrrev_b32_e32 v15, 24, v3 5235; CI-NEXT: v_mov_b32_e32 v4, v1 5236; CI-NEXT: v_mov_b32_e32 v8, v2 5237; CI-NEXT: v_mov_b32_e32 v12, v3 5238; CI-NEXT: v_mov_b32_e32 v1, v16 5239; CI-NEXT: v_mov_b32_e32 v2, v17 5240; CI-NEXT: v_mov_b32_e32 v3, v18 5241; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5242; CI-NEXT: s_endpgm 5243; 5244; GFX9-LABEL: test_call_external_void_func_v16i8: 5245; GFX9: ; %bb.0: 5246; GFX9-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 5247; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 5248; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 5249; GFX9-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 5250; GFX9-NEXT: s_mov_b32 s38, -1 5251; GFX9-NEXT: s_mov_b32 s39, 0xe00000 5252; GFX9-NEXT: s_add_u32 s36, s36, s3 5253; GFX9-NEXT: s_mov_b32 s3, 0xf000 5254; GFX9-NEXT: s_mov_b32 s2, -1 5255; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5256; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 5257; GFX9-NEXT: s_addc_u32 s37, s37, 0 5258; GFX9-NEXT: s_mov_b64 s[0:1], s[36:37] 5259; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39] 5260; GFX9-NEXT: s_mov_b32 s32, 0 5261; GFX9-NEXT: s_getpc_b64 s[4:5] 5262; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4 5263; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12 5264; GFX9-NEXT: s_waitcnt vmcnt(0) 5265; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 5266; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 5267; GFX9-NEXT: v_lshrrev_b32_e32 v18, 24, v0 5268; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v1 5269; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v1 5270; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v1 5271; GFX9-NEXT: v_lshrrev_b32_e32 v9, 8, v2 5272; GFX9-NEXT: v_lshrrev_b32_e32 v10, 16, v2 5273; GFX9-NEXT: v_lshrrev_b32_e32 v11, 24, v2 5274; GFX9-NEXT: v_lshrrev_b32_e32 v13, 8, v3 5275; GFX9-NEXT: v_lshrrev_b32_e32 v14, 16, v3 5276; GFX9-NEXT: v_lshrrev_b32_e32 v15, 24, v3 5277; GFX9-NEXT: v_mov_b32_e32 v4, v1 5278; GFX9-NEXT: v_mov_b32_e32 v8, v2 5279; GFX9-NEXT: v_mov_b32_e32 v12, v3 5280; GFX9-NEXT: v_mov_b32_e32 v1, v16 5281; GFX9-NEXT: v_mov_b32_e32 v2, v17 5282; GFX9-NEXT: v_mov_b32_e32 v3, v18 5283; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 5284; GFX9-NEXT: s_endpgm 5285; 5286; GFX11-LABEL: test_call_external_void_func_v16i8: 5287; GFX11: ; %bb.0: 5288; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 5289; GFX11-NEXT: s_mov_b32 s7, 0x31016000 5290; GFX11-NEXT: s_mov_b32 s6, -1 5291; GFX11-NEXT: s_mov_b32 s32, 0 5292; GFX11-NEXT: s_getpc_b64 s[2:3] 5293; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v16i8@rel32@lo+4 5294; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v16i8@rel32@hi+12 5295; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5296; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[4:7], 0 5297; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 5298; GFX11-NEXT: s_waitcnt vmcnt(0) 5299; GFX11-NEXT: v_lshrrev_b32_e32 v16, 8, v0 5300; GFX11-NEXT: v_lshrrev_b32_e32 v17, 16, v0 5301; GFX11-NEXT: v_lshrrev_b32_e32 v18, 24, v0 5302; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 5303; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v1 5304; GFX11-NEXT: v_lshrrev_b32_e32 v7, 24, v1 5305; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 5306; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 5307; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 5308; GFX11-NEXT: v_lshrrev_b32_e32 v13, 8, v3 5309; GFX11-NEXT: v_lshrrev_b32_e32 v14, 16, v3 5310; GFX11-NEXT: v_lshrrev_b32_e32 v15, 24, v3 5311; GFX11-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16 5312; GFX11-NEXT: v_mov_b32_e32 v8, v2 5313; GFX11-NEXT: v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18 5314; GFX11-NEXT: v_mov_b32_e32 v2, v17 5315; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 5316; GFX11-NEXT: s_endpgm 5317; 5318; HSA-LABEL: test_call_external_void_func_v16i8: 5319; HSA: ; %bb.0: 5320; HSA-NEXT: s_add_i32 s6, s6, s9 5321; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s6, 8 5322; HSA-NEXT: s_add_u32 s0, s0, s9 5323; HSA-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x0 5324; HSA-NEXT: s_mov_b32 s11, 0x1100f000 5325; HSA-NEXT: s_mov_b32 s10, -1 5326; HSA-NEXT: s_mov_b32 flat_scratch_lo, s7 5327; HSA-NEXT: s_addc_u32 s1, s1, 0 5328; HSA-NEXT: s_waitcnt lgkmcnt(0) 5329; HSA-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 5330; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 5331; HSA-NEXT: s_mov_b32 s32, 0 5332; HSA-NEXT: s_getpc_b64 s[8:9] 5333; HSA-NEXT: s_add_u32 s8, s8, external_void_func_v16i8@rel32@lo+4 5334; HSA-NEXT: s_addc_u32 s9, s9, external_void_func_v16i8@rel32@hi+12 5335; HSA-NEXT: s_waitcnt vmcnt(0) 5336; HSA-NEXT: v_lshrrev_b32_e32 v16, 8, v0 5337; HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 5338; HSA-NEXT: v_lshrrev_b32_e32 v18, 24, v0 5339; HSA-NEXT: v_lshrrev_b32_e32 v5, 8, v1 5340; HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v1 5341; HSA-NEXT: v_lshrrev_b32_e32 v7, 24, v1 5342; HSA-NEXT: v_lshrrev_b32_e32 v9, 8, v2 5343; HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 5344; HSA-NEXT: v_lshrrev_b32_e32 v11, 24, v2 5345; HSA-NEXT: v_lshrrev_b32_e32 v13, 8, v3 5346; HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v3 5347; HSA-NEXT: v_lshrrev_b32_e32 v15, 24, v3 5348; HSA-NEXT: v_mov_b32_e32 v4, v1 5349; HSA-NEXT: v_mov_b32_e32 v8, v2 5350; HSA-NEXT: v_mov_b32_e32 v12, v3 5351; HSA-NEXT: v_mov_b32_e32 v1, v16 5352; HSA-NEXT: v_mov_b32_e32 v2, v17 5353; HSA-NEXT: v_mov_b32_e32 v3, v18 5354; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9] 5355; HSA-NEXT: s_endpgm 5356 %ptr = load ptr addrspace(1), ptr addrspace(4) undef 5357 %val = load <16 x i8>, ptr addrspace(1) %ptr 5358 call void @external_void_func_v16i8(<16 x i8> %val) 5359 ret void 5360} 5361 5362define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { 5363; VI-LABEL: stack_passed_arg_alignment_v32i32_f64: 5364; VI: ; %bb.0: ; %entry 5365; VI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 5366; VI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 5367; VI-NEXT: s_mov_b32 s54, -1 5368; VI-NEXT: s_mov_b32 s55, 0xe80000 5369; VI-NEXT: s_add_u32 s52, s52, s5 5370; VI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 5371; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 5372; VI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 5373; VI-NEXT: s_mov_b32 s32, 0 5374; VI-NEXT: s_addc_u32 s53, s53, 0 5375; VI-NEXT: s_waitcnt lgkmcnt(0) 5376; VI-NEXT: v_mov_b32_e32 v0, s23 5377; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 5378; VI-NEXT: v_mov_b32_e32 v0, s4 5379; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 5380; VI-NEXT: v_mov_b32_e32 v0, s5 5381; VI-NEXT: s_mov_b64 s[6:7], s[0:1] 5382; VI-NEXT: s_mov_b64 s[0:1], s[52:53] 5383; VI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 5384; VI-NEXT: s_mov_b64 s[2:3], s[54:55] 5385; VI-NEXT: v_mov_b32_e32 v0, s36 5386; VI-NEXT: v_mov_b32_e32 v1, s37 5387; VI-NEXT: v_mov_b32_e32 v2, s38 5388; VI-NEXT: v_mov_b32_e32 v3, s39 5389; VI-NEXT: v_mov_b32_e32 v4, s40 5390; VI-NEXT: v_mov_b32_e32 v5, s41 5391; VI-NEXT: v_mov_b32_e32 v6, s42 5392; VI-NEXT: v_mov_b32_e32 v7, s43 5393; VI-NEXT: v_mov_b32_e32 v8, s44 5394; VI-NEXT: v_mov_b32_e32 v9, s45 5395; VI-NEXT: v_mov_b32_e32 v10, s46 5396; VI-NEXT: v_mov_b32_e32 v11, s47 5397; VI-NEXT: v_mov_b32_e32 v12, s48 5398; VI-NEXT: v_mov_b32_e32 v13, s49 5399; VI-NEXT: v_mov_b32_e32 v14, s50 5400; VI-NEXT: v_mov_b32_e32 v15, s51 5401; VI-NEXT: v_mov_b32_e32 v16, s8 5402; VI-NEXT: v_mov_b32_e32 v17, s9 5403; VI-NEXT: v_mov_b32_e32 v18, s10 5404; VI-NEXT: v_mov_b32_e32 v19, s11 5405; VI-NEXT: v_mov_b32_e32 v20, s12 5406; VI-NEXT: v_mov_b32_e32 v21, s13 5407; VI-NEXT: v_mov_b32_e32 v22, s14 5408; VI-NEXT: v_mov_b32_e32 v23, s15 5409; VI-NEXT: v_mov_b32_e32 v24, s16 5410; VI-NEXT: v_mov_b32_e32 v25, s17 5411; VI-NEXT: v_mov_b32_e32 v26, s18 5412; VI-NEXT: v_mov_b32_e32 v27, s19 5413; VI-NEXT: v_mov_b32_e32 v28, s20 5414; VI-NEXT: v_mov_b32_e32 v29, s21 5415; VI-NEXT: v_mov_b32_e32 v30, s22 5416; VI-NEXT: s_getpc_b64 s[4:5] 5417; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5418; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5419; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5420; VI-NEXT: s_endpgm 5421; 5422; CI-LABEL: stack_passed_arg_alignment_v32i32_f64: 5423; CI: ; %bb.0: ; %entry 5424; CI-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 5425; CI-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 5426; CI-NEXT: s_mov_b32 s54, -1 5427; CI-NEXT: s_mov_b32 s55, 0xe8f000 5428; CI-NEXT: s_add_u32 s52, s52, s5 5429; CI-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x19 5430; CI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x29 5431; CI-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x9 5432; CI-NEXT: s_mov_b32 s32, 0 5433; CI-NEXT: s_addc_u32 s53, s53, 0 5434; CI-NEXT: s_waitcnt lgkmcnt(0) 5435; CI-NEXT: v_mov_b32_e32 v0, s23 5436; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 5437; CI-NEXT: v_mov_b32_e32 v0, s4 5438; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 5439; CI-NEXT: v_mov_b32_e32 v0, s5 5440; CI-NEXT: s_mov_b64 s[6:7], s[0:1] 5441; CI-NEXT: s_mov_b64 s[0:1], s[52:53] 5442; CI-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 5443; CI-NEXT: s_mov_b64 s[2:3], s[54:55] 5444; CI-NEXT: v_mov_b32_e32 v0, s36 5445; CI-NEXT: v_mov_b32_e32 v1, s37 5446; CI-NEXT: v_mov_b32_e32 v2, s38 5447; CI-NEXT: v_mov_b32_e32 v3, s39 5448; CI-NEXT: v_mov_b32_e32 v4, s40 5449; CI-NEXT: v_mov_b32_e32 v5, s41 5450; CI-NEXT: v_mov_b32_e32 v6, s42 5451; CI-NEXT: v_mov_b32_e32 v7, s43 5452; CI-NEXT: v_mov_b32_e32 v8, s44 5453; CI-NEXT: v_mov_b32_e32 v9, s45 5454; CI-NEXT: v_mov_b32_e32 v10, s46 5455; CI-NEXT: v_mov_b32_e32 v11, s47 5456; CI-NEXT: v_mov_b32_e32 v12, s48 5457; CI-NEXT: v_mov_b32_e32 v13, s49 5458; CI-NEXT: v_mov_b32_e32 v14, s50 5459; CI-NEXT: v_mov_b32_e32 v15, s51 5460; CI-NEXT: v_mov_b32_e32 v16, s8 5461; CI-NEXT: v_mov_b32_e32 v17, s9 5462; CI-NEXT: v_mov_b32_e32 v18, s10 5463; CI-NEXT: v_mov_b32_e32 v19, s11 5464; CI-NEXT: v_mov_b32_e32 v20, s12 5465; CI-NEXT: v_mov_b32_e32 v21, s13 5466; CI-NEXT: v_mov_b32_e32 v22, s14 5467; CI-NEXT: v_mov_b32_e32 v23, s15 5468; CI-NEXT: v_mov_b32_e32 v24, s16 5469; CI-NEXT: v_mov_b32_e32 v25, s17 5470; CI-NEXT: v_mov_b32_e32 v26, s18 5471; CI-NEXT: v_mov_b32_e32 v27, s19 5472; CI-NEXT: v_mov_b32_e32 v28, s20 5473; CI-NEXT: v_mov_b32_e32 v29, s21 5474; CI-NEXT: v_mov_b32_e32 v30, s22 5475; CI-NEXT: s_getpc_b64 s[4:5] 5476; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5477; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5478; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5479; CI-NEXT: s_endpgm 5480; 5481; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64: 5482; GFX9: ; %bb.0: ; %entry 5483; GFX9-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 5484; GFX9-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 5485; GFX9-NEXT: s_mov_b32 s54, -1 5486; GFX9-NEXT: s_mov_b32 s55, 0xe00000 5487; GFX9-NEXT: s_add_u32 s52, s52, s5 5488; GFX9-NEXT: s_load_dwordx16 s[8:23], s[2:3], 0x64 5489; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa4 5490; GFX9-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x24 5491; GFX9-NEXT: s_mov_b32 s32, 0 5492; GFX9-NEXT: s_addc_u32 s53, s53, 0 5493; GFX9-NEXT: s_waitcnt lgkmcnt(0) 5494; GFX9-NEXT: v_mov_b32_e32 v0, s23 5495; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 5496; GFX9-NEXT: v_mov_b32_e32 v0, s4 5497; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:4 5498; GFX9-NEXT: v_mov_b32_e32 v0, s5 5499; GFX9-NEXT: s_mov_b64 s[6:7], s[0:1] 5500; GFX9-NEXT: s_mov_b64 s[0:1], s[52:53] 5501; GFX9-NEXT: buffer_store_dword v0, off, s[52:55], s32 offset:8 5502; GFX9-NEXT: s_mov_b64 s[2:3], s[54:55] 5503; GFX9-NEXT: v_mov_b32_e32 v0, s36 5504; GFX9-NEXT: v_mov_b32_e32 v1, s37 5505; GFX9-NEXT: v_mov_b32_e32 v2, s38 5506; GFX9-NEXT: v_mov_b32_e32 v3, s39 5507; GFX9-NEXT: v_mov_b32_e32 v4, s40 5508; GFX9-NEXT: v_mov_b32_e32 v5, s41 5509; GFX9-NEXT: v_mov_b32_e32 v6, s42 5510; GFX9-NEXT: v_mov_b32_e32 v7, s43 5511; GFX9-NEXT: v_mov_b32_e32 v8, s44 5512; GFX9-NEXT: v_mov_b32_e32 v9, s45 5513; GFX9-NEXT: v_mov_b32_e32 v10, s46 5514; GFX9-NEXT: v_mov_b32_e32 v11, s47 5515; GFX9-NEXT: v_mov_b32_e32 v12, s48 5516; GFX9-NEXT: v_mov_b32_e32 v13, s49 5517; GFX9-NEXT: v_mov_b32_e32 v14, s50 5518; GFX9-NEXT: v_mov_b32_e32 v15, s51 5519; GFX9-NEXT: v_mov_b32_e32 v16, s8 5520; GFX9-NEXT: v_mov_b32_e32 v17, s9 5521; GFX9-NEXT: v_mov_b32_e32 v18, s10 5522; GFX9-NEXT: v_mov_b32_e32 v19, s11 5523; GFX9-NEXT: v_mov_b32_e32 v20, s12 5524; GFX9-NEXT: v_mov_b32_e32 v21, s13 5525; GFX9-NEXT: v_mov_b32_e32 v22, s14 5526; GFX9-NEXT: v_mov_b32_e32 v23, s15 5527; GFX9-NEXT: v_mov_b32_e32 v24, s16 5528; GFX9-NEXT: v_mov_b32_e32 v25, s17 5529; GFX9-NEXT: v_mov_b32_e32 v26, s18 5530; GFX9-NEXT: v_mov_b32_e32 v27, s19 5531; GFX9-NEXT: v_mov_b32_e32 v28, s20 5532; GFX9-NEXT: v_mov_b32_e32 v29, s21 5533; GFX9-NEXT: v_mov_b32_e32 v30, s22 5534; GFX9-NEXT: s_getpc_b64 s[4:5] 5535; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5536; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5537; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 5538; GFX9-NEXT: s_endpgm 5539; 5540; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64: 5541; GFX11: ; %bb.0: ; %entry 5542; GFX11-NEXT: s_clause 0x2 5543; GFX11-NEXT: s_load_b64 s[20:21], s[2:3], 0xa4 5544; GFX11-NEXT: s_load_b512 s[4:19], s[2:3], 0x64 5545; GFX11-NEXT: s_load_b512 s[36:51], s[2:3], 0x24 5546; GFX11-NEXT: s_mov_b32 s32, 0 5547; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5548; GFX11-NEXT: s_add_i32 s22, s32, 8 5549; GFX11-NEXT: s_waitcnt lgkmcnt(0) 5550; GFX11-NEXT: v_dual_mov_b32 v0, s21 :: v_dual_mov_b32 v1, s20 5551; GFX11-NEXT: v_mov_b32_e32 v2, s19 5552; GFX11-NEXT: s_add_i32 s19, s32, 4 5553; GFX11-NEXT: v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s43 5554; GFX11-NEXT: scratch_store_b32 off, v0, s22 5555; GFX11-NEXT: scratch_store_b32 off, v1, s19 5556; GFX11-NEXT: scratch_store_b32 off, v2, s32 5557; GFX11-NEXT: v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39 5558; GFX11-NEXT: v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s38 5559; GFX11-NEXT: v_dual_mov_b32 v5, s41 :: v_dual_mov_b32 v6, s42 5560; GFX11-NEXT: v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44 5561; GFX11-NEXT: v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46 5562; GFX11-NEXT: v_dual_mov_b32 v13, s49 :: v_dual_mov_b32 v12, s48 5563; GFX11-NEXT: v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v14, s50 5564; GFX11-NEXT: v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4 5565; GFX11-NEXT: v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6 5566; GFX11-NEXT: v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8 5567; GFX11-NEXT: v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10 5568; GFX11-NEXT: v_dual_mov_b32 v25, s13 :: v_dual_mov_b32 v24, s12 5569; GFX11-NEXT: v_dual_mov_b32 v27, s15 :: v_dual_mov_b32 v26, s14 5570; GFX11-NEXT: v_dual_mov_b32 v29, s17 :: v_dual_mov_b32 v28, s16 5571; GFX11-NEXT: v_mov_b32_e32 v30, s18 5572; GFX11-NEXT: s_mov_b64 s[6:7], s[0:1] 5573; GFX11-NEXT: s_getpc_b64 s[2:3] 5574; GFX11-NEXT: s_add_u32 s2, s2, stack_passed_f64_arg@rel32@lo+4 5575; GFX11-NEXT: s_addc_u32 s3, s3, stack_passed_f64_arg@rel32@hi+12 5576; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5577; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] 5578; GFX11-NEXT: s_endpgm 5579; 5580; HSA-LABEL: stack_passed_arg_alignment_v32i32_f64: 5581; HSA: ; %bb.0: ; %entry 5582; HSA-NEXT: s_add_i32 s8, s8, s11 5583; HSA-NEXT: s_lshr_b32 flat_scratch_hi, s8, 8 5584; HSA-NEXT: s_mov_b32 flat_scratch_lo, s9 5585; HSA-NEXT: s_add_u32 s0, s0, s11 5586; HSA-NEXT: s_load_dwordx16 s[8:23], s[6:7], 0x40 5587; HSA-NEXT: s_load_dwordx2 s[24:25], s[6:7], 0x80 5588; HSA-NEXT: s_load_dwordx16 s[36:51], s[6:7], 0x0 5589; HSA-NEXT: s_mov_b32 s32, 0 5590; HSA-NEXT: s_addc_u32 s1, s1, 0 5591; HSA-NEXT: s_waitcnt lgkmcnt(0) 5592; HSA-NEXT: v_mov_b32_e32 v0, s23 5593; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 5594; HSA-NEXT: v_mov_b32_e32 v0, s24 5595; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 5596; HSA-NEXT: v_mov_b32_e32 v0, s25 5597; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 5598; HSA-NEXT: s_mov_b64 s[6:7], s[4:5] 5599; HSA-NEXT: v_mov_b32_e32 v0, s36 5600; HSA-NEXT: v_mov_b32_e32 v1, s37 5601; HSA-NEXT: v_mov_b32_e32 v2, s38 5602; HSA-NEXT: v_mov_b32_e32 v3, s39 5603; HSA-NEXT: v_mov_b32_e32 v4, s40 5604; HSA-NEXT: v_mov_b32_e32 v5, s41 5605; HSA-NEXT: v_mov_b32_e32 v6, s42 5606; HSA-NEXT: v_mov_b32_e32 v7, s43 5607; HSA-NEXT: v_mov_b32_e32 v8, s44 5608; HSA-NEXT: v_mov_b32_e32 v9, s45 5609; HSA-NEXT: v_mov_b32_e32 v10, s46 5610; HSA-NEXT: v_mov_b32_e32 v11, s47 5611; HSA-NEXT: v_mov_b32_e32 v12, s48 5612; HSA-NEXT: v_mov_b32_e32 v13, s49 5613; HSA-NEXT: v_mov_b32_e32 v14, s50 5614; HSA-NEXT: v_mov_b32_e32 v15, s51 5615; HSA-NEXT: v_mov_b32_e32 v16, s8 5616; HSA-NEXT: v_mov_b32_e32 v17, s9 5617; HSA-NEXT: v_mov_b32_e32 v18, s10 5618; HSA-NEXT: v_mov_b32_e32 v19, s11 5619; HSA-NEXT: v_mov_b32_e32 v20, s12 5620; HSA-NEXT: v_mov_b32_e32 v21, s13 5621; HSA-NEXT: v_mov_b32_e32 v22, s14 5622; HSA-NEXT: v_mov_b32_e32 v23, s15 5623; HSA-NEXT: v_mov_b32_e32 v24, s16 5624; HSA-NEXT: v_mov_b32_e32 v25, s17 5625; HSA-NEXT: v_mov_b32_e32 v26, s18 5626; HSA-NEXT: v_mov_b32_e32 v27, s19 5627; HSA-NEXT: v_mov_b32_e32 v28, s20 5628; HSA-NEXT: v_mov_b32_e32 v29, s21 5629; HSA-NEXT: v_mov_b32_e32 v30, s22 5630; HSA-NEXT: s_getpc_b64 s[24:25] 5631; HSA-NEXT: s_add_u32 s24, s24, stack_passed_f64_arg@rel32@lo+4 5632; HSA-NEXT: s_addc_u32 s25, s25, stack_passed_f64_arg@rel32@hi+12 5633; HSA-NEXT: s_swappc_b64 s[30:31], s[24:25] 5634; HSA-NEXT: s_endpgm 5635entry: 5636 call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) 5637 ret void 5638} 5639 5640define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { 5641; VI-LABEL: tail_call_byval_align16: 5642; VI: ; %bb.0: ; %entry 5643; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5644; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 5645; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 5646; VI-NEXT: s_getpc_b64 s[4:5] 5647; VI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 5648; VI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 5649; VI-NEXT: s_waitcnt vmcnt(1) 5650; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 5651; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 5652; VI-NEXT: s_waitcnt vmcnt(2) 5653; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 5654; VI-NEXT: s_waitcnt vmcnt(1) 5655; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 5656; VI-NEXT: s_setpc_b64 s[4:5] 5657; 5658; CI-LABEL: tail_call_byval_align16: 5659; CI: ; %bb.0: ; %entry 5660; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5661; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 5662; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 5663; CI-NEXT: s_getpc_b64 s[4:5] 5664; CI-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 5665; CI-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 5666; CI-NEXT: s_waitcnt vmcnt(1) 5667; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 5668; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 5669; CI-NEXT: s_waitcnt vmcnt(2) 5670; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 5671; CI-NEXT: s_waitcnt vmcnt(1) 5672; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 5673; CI-NEXT: s_setpc_b64 s[4:5] 5674; 5675; GFX9-LABEL: tail_call_byval_align16: 5676; GFX9: ; %bb.0: ; %entry 5677; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5678; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 5679; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 5680; GFX9-NEXT: s_getpc_b64 s[4:5] 5681; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 5682; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 5683; GFX9-NEXT: s_waitcnt vmcnt(1) 5684; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 5685; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 5686; GFX9-NEXT: s_waitcnt vmcnt(2) 5687; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 5688; GFX9-NEXT: s_waitcnt vmcnt(1) 5689; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 5690; GFX9-NEXT: s_setpc_b64 s[4:5] 5691; 5692; GFX11-LABEL: tail_call_byval_align16: 5693; GFX11: ; %bb.0: ; %entry 5694; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5695; GFX11-NEXT: scratch_load_b32 v31, off, s32 5696; GFX11-NEXT: s_getpc_b64 s[0:1] 5697; GFX11-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 5698; GFX11-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 5699; GFX11-NEXT: s_waitcnt vmcnt(0) 5700; GFX11-NEXT: scratch_store_b32 off, v31, s32 5701; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:24 5702; GFX11-NEXT: s_waitcnt vmcnt(0) 5703; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:16 5704; GFX11-NEXT: s_setpc_b64 s[0:1] 5705; 5706; HSA-LABEL: tail_call_byval_align16: 5707; HSA: ; %bb.0: ; %entry 5708; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5709; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:28 5710; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 5711; HSA-NEXT: s_getpc_b64 s[4:5] 5712; HSA-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 5713; HSA-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 5714; HSA-NEXT: s_waitcnt vmcnt(1) 5715; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:20 5716; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 offset:24 5717; HSA-NEXT: s_waitcnt vmcnt(2) 5718; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 5719; HSA-NEXT: s_waitcnt vmcnt(1) 5720; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 offset:16 5721; HSA-NEXT: s_setpc_b64 s[4:5] 5722entry: 5723 %alloca = alloca double, align 8, addrspace(5) 5724 tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca) 5725 ret void 5726} 5727 5728define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { 5729; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: 5730; VI: ; %bb.0: ; %entry 5731; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5732; VI-NEXT: buffer_load_dword v31, off, s[0:3], s32 5733; VI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 5734; VI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 5735; VI-NEXT: s_getpc_b64 s[4:5] 5736; VI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5737; VI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5738; VI-NEXT: s_waitcnt vmcnt(2) 5739; VI-NEXT: buffer_store_dword v31, off, s[0:3], s32 5740; VI-NEXT: s_waitcnt vmcnt(2) 5741; VI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 5742; VI-NEXT: s_waitcnt vmcnt(2) 5743; VI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 5744; VI-NEXT: s_setpc_b64 s[4:5] 5745; 5746; CI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: 5747; CI: ; %bb.0: ; %entry 5748; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5749; CI-NEXT: buffer_load_dword v31, off, s[0:3], s32 5750; CI-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 5751; CI-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 5752; CI-NEXT: s_getpc_b64 s[4:5] 5753; CI-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5754; CI-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5755; CI-NEXT: s_waitcnt vmcnt(2) 5756; CI-NEXT: buffer_store_dword v31, off, s[0:3], s32 5757; CI-NEXT: s_waitcnt vmcnt(2) 5758; CI-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 5759; CI-NEXT: s_waitcnt vmcnt(2) 5760; CI-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 5761; CI-NEXT: s_setpc_b64 s[4:5] 5762; 5763; GFX9-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: 5764; GFX9: ; %bb.0: ; %entry 5765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5766; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s32 5767; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 5768; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 5769; GFX9-NEXT: s_getpc_b64 s[4:5] 5770; GFX9-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5771; GFX9-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5772; GFX9-NEXT: s_waitcnt vmcnt(2) 5773; GFX9-NEXT: buffer_store_dword v31, off, s[0:3], s32 5774; GFX9-NEXT: s_waitcnt vmcnt(2) 5775; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 5776; GFX9-NEXT: s_waitcnt vmcnt(2) 5777; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 5778; GFX9-NEXT: s_setpc_b64 s[4:5] 5779; 5780; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: 5781; GFX11: ; %bb.0: ; %entry 5782; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5783; GFX11-NEXT: s_clause 0x1 5784; GFX11-NEXT: scratch_load_b32 v33, off, s32 5785; GFX11-NEXT: scratch_load_b64 v[31:32], off, s32 offset:4 5786; GFX11-NEXT: s_getpc_b64 s[0:1] 5787; GFX11-NEXT: s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4 5788; GFX11-NEXT: s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12 5789; GFX11-NEXT: s_waitcnt vmcnt(1) 5790; GFX11-NEXT: scratch_store_b32 off, v33, s32 5791; GFX11-NEXT: s_waitcnt vmcnt(0) 5792; GFX11-NEXT: scratch_store_b64 off, v[31:32], s32 offset:4 5793; GFX11-NEXT: s_setpc_b64 s[0:1] 5794; 5795; HSA-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64: 5796; HSA: ; %bb.0: ; %entry 5797; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5798; HSA-NEXT: buffer_load_dword v31, off, s[0:3], s32 5799; HSA-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 5800; HSA-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 5801; HSA-NEXT: s_getpc_b64 s[4:5] 5802; HSA-NEXT: s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4 5803; HSA-NEXT: s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12 5804; HSA-NEXT: s_waitcnt vmcnt(2) 5805; HSA-NEXT: buffer_store_dword v31, off, s[0:3], s32 5806; HSA-NEXT: s_waitcnt vmcnt(2) 5807; HSA-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 5808; HSA-NEXT: s_waitcnt vmcnt(2) 5809; HSA-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:8 5810; HSA-NEXT: s_setpc_b64 s[4:5] 5811entry: 5812 tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) 5813 ret void 5814} 5815 5816define void @stack_12xv3i32() #0 { 5817; VI-LABEL: stack_12xv3i32: 5818; VI: ; %bb.0: ; %entry 5819; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5820; VI-NEXT: s_mov_b32 s4, s33 5821; VI-NEXT: s_mov_b32 s33, s32 5822; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 5823; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 5824; VI-NEXT: s_mov_b64 exec, s[8:9] 5825; VI-NEXT: s_addk_i32 s32, 0x400 5826; VI-NEXT: v_mov_b32_e32 v0, 11 5827; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 5828; VI-NEXT: v_mov_b32_e32 v0, 12 5829; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 5830; VI-NEXT: v_mov_b32_e32 v0, 13 5831; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 5832; VI-NEXT: v_mov_b32_e32 v0, 14 5833; VI-NEXT: v_writelane_b32 v40, s4, 2 5834; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 5835; VI-NEXT: v_mov_b32_e32 v0, 15 5836; VI-NEXT: v_writelane_b32 v40, s30, 0 5837; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 5838; VI-NEXT: v_mov_b32_e32 v0, 0 5839; VI-NEXT: v_mov_b32_e32 v1, 0 5840; VI-NEXT: v_mov_b32_e32 v2, 0 5841; VI-NEXT: v_mov_b32_e32 v3, 1 5842; VI-NEXT: v_mov_b32_e32 v4, 1 5843; VI-NEXT: v_mov_b32_e32 v5, 1 5844; VI-NEXT: v_mov_b32_e32 v6, 2 5845; VI-NEXT: v_mov_b32_e32 v7, 2 5846; VI-NEXT: v_mov_b32_e32 v8, 2 5847; VI-NEXT: v_mov_b32_e32 v9, 3 5848; VI-NEXT: v_mov_b32_e32 v10, 3 5849; VI-NEXT: v_mov_b32_e32 v11, 3 5850; VI-NEXT: v_mov_b32_e32 v12, 4 5851; VI-NEXT: v_mov_b32_e32 v13, 4 5852; VI-NEXT: v_mov_b32_e32 v14, 4 5853; VI-NEXT: v_mov_b32_e32 v15, 5 5854; VI-NEXT: v_mov_b32_e32 v16, 5 5855; VI-NEXT: v_mov_b32_e32 v17, 5 5856; VI-NEXT: v_mov_b32_e32 v18, 6 5857; VI-NEXT: v_mov_b32_e32 v19, 6 5858; VI-NEXT: v_mov_b32_e32 v20, 6 5859; VI-NEXT: v_mov_b32_e32 v21, 7 5860; VI-NEXT: v_mov_b32_e32 v22, 7 5861; VI-NEXT: v_mov_b32_e32 v23, 7 5862; VI-NEXT: v_mov_b32_e32 v24, 8 5863; VI-NEXT: v_mov_b32_e32 v25, 8 5864; VI-NEXT: v_mov_b32_e32 v26, 8 5865; VI-NEXT: v_mov_b32_e32 v27, 9 5866; VI-NEXT: v_mov_b32_e32 v28, 9 5867; VI-NEXT: v_mov_b32_e32 v29, 9 5868; VI-NEXT: v_mov_b32_e32 v30, 10 5869; VI-NEXT: v_writelane_b32 v40, s31, 1 5870; VI-NEXT: s_getpc_b64 s[4:5] 5871; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 5872; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 5873; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5874; VI-NEXT: v_readlane_b32 s31, v40, 1 5875; VI-NEXT: v_readlane_b32 s30, v40, 0 5876; VI-NEXT: s_mov_b32 s32, s33 5877; VI-NEXT: v_readlane_b32 s4, v40, 2 5878; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 5879; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 5880; VI-NEXT: s_mov_b64 exec, s[6:7] 5881; VI-NEXT: s_mov_b32 s33, s4 5882; VI-NEXT: s_waitcnt vmcnt(0) 5883; VI-NEXT: s_setpc_b64 s[30:31] 5884; 5885; CI-LABEL: stack_12xv3i32: 5886; CI: ; %bb.0: ; %entry 5887; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5888; CI-NEXT: s_mov_b32 s4, s33 5889; CI-NEXT: s_mov_b32 s33, s32 5890; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 5891; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 5892; CI-NEXT: s_mov_b64 exec, s[8:9] 5893; CI-NEXT: s_addk_i32 s32, 0x400 5894; CI-NEXT: v_mov_b32_e32 v0, 11 5895; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 5896; CI-NEXT: v_mov_b32_e32 v0, 12 5897; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 5898; CI-NEXT: v_mov_b32_e32 v0, 13 5899; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 5900; CI-NEXT: v_mov_b32_e32 v0, 14 5901; CI-NEXT: v_writelane_b32 v40, s4, 2 5902; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 5903; CI-NEXT: v_mov_b32_e32 v0, 15 5904; CI-NEXT: v_writelane_b32 v40, s30, 0 5905; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 5906; CI-NEXT: v_mov_b32_e32 v0, 0 5907; CI-NEXT: v_mov_b32_e32 v1, 0 5908; CI-NEXT: v_mov_b32_e32 v2, 0 5909; CI-NEXT: v_mov_b32_e32 v3, 1 5910; CI-NEXT: v_mov_b32_e32 v4, 1 5911; CI-NEXT: v_mov_b32_e32 v5, 1 5912; CI-NEXT: v_mov_b32_e32 v6, 2 5913; CI-NEXT: v_mov_b32_e32 v7, 2 5914; CI-NEXT: v_mov_b32_e32 v8, 2 5915; CI-NEXT: v_mov_b32_e32 v9, 3 5916; CI-NEXT: v_mov_b32_e32 v10, 3 5917; CI-NEXT: v_mov_b32_e32 v11, 3 5918; CI-NEXT: v_mov_b32_e32 v12, 4 5919; CI-NEXT: v_mov_b32_e32 v13, 4 5920; CI-NEXT: v_mov_b32_e32 v14, 4 5921; CI-NEXT: v_mov_b32_e32 v15, 5 5922; CI-NEXT: v_mov_b32_e32 v16, 5 5923; CI-NEXT: v_mov_b32_e32 v17, 5 5924; CI-NEXT: v_mov_b32_e32 v18, 6 5925; CI-NEXT: v_mov_b32_e32 v19, 6 5926; CI-NEXT: v_mov_b32_e32 v20, 6 5927; CI-NEXT: v_mov_b32_e32 v21, 7 5928; CI-NEXT: v_mov_b32_e32 v22, 7 5929; CI-NEXT: v_mov_b32_e32 v23, 7 5930; CI-NEXT: v_mov_b32_e32 v24, 8 5931; CI-NEXT: v_mov_b32_e32 v25, 8 5932; CI-NEXT: v_mov_b32_e32 v26, 8 5933; CI-NEXT: v_mov_b32_e32 v27, 9 5934; CI-NEXT: v_mov_b32_e32 v28, 9 5935; CI-NEXT: v_mov_b32_e32 v29, 9 5936; CI-NEXT: v_mov_b32_e32 v30, 10 5937; CI-NEXT: v_writelane_b32 v40, s31, 1 5938; CI-NEXT: s_getpc_b64 s[4:5] 5939; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 5940; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 5941; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 5942; CI-NEXT: v_readlane_b32 s31, v40, 1 5943; CI-NEXT: v_readlane_b32 s30, v40, 0 5944; CI-NEXT: s_mov_b32 s32, s33 5945; CI-NEXT: v_readlane_b32 s4, v40, 2 5946; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 5947; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 5948; CI-NEXT: s_mov_b64 exec, s[6:7] 5949; CI-NEXT: s_mov_b32 s33, s4 5950; CI-NEXT: s_waitcnt vmcnt(0) 5951; CI-NEXT: s_setpc_b64 s[30:31] 5952; 5953; GFX9-LABEL: stack_12xv3i32: 5954; GFX9: ; %bb.0: ; %entry 5955; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5956; GFX9-NEXT: s_mov_b32 s4, s33 5957; GFX9-NEXT: s_mov_b32 s33, s32 5958; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 5959; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 5960; GFX9-NEXT: s_mov_b64 exec, s[8:9] 5961; GFX9-NEXT: s_addk_i32 s32, 0x400 5962; GFX9-NEXT: v_mov_b32_e32 v0, 11 5963; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 5964; GFX9-NEXT: v_mov_b32_e32 v0, 12 5965; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 5966; GFX9-NEXT: v_mov_b32_e32 v0, 13 5967; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 5968; GFX9-NEXT: v_mov_b32_e32 v0, 14 5969; GFX9-NEXT: v_writelane_b32 v40, s4, 2 5970; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 5971; GFX9-NEXT: v_mov_b32_e32 v0, 15 5972; GFX9-NEXT: v_writelane_b32 v40, s30, 0 5973; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 5974; GFX9-NEXT: v_mov_b32_e32 v0, 0 5975; GFX9-NEXT: v_mov_b32_e32 v1, 0 5976; GFX9-NEXT: v_mov_b32_e32 v2, 0 5977; GFX9-NEXT: v_mov_b32_e32 v3, 1 5978; GFX9-NEXT: v_mov_b32_e32 v4, 1 5979; GFX9-NEXT: v_mov_b32_e32 v5, 1 5980; GFX9-NEXT: v_mov_b32_e32 v6, 2 5981; GFX9-NEXT: v_mov_b32_e32 v7, 2 5982; GFX9-NEXT: v_mov_b32_e32 v8, 2 5983; GFX9-NEXT: v_mov_b32_e32 v9, 3 5984; GFX9-NEXT: v_mov_b32_e32 v10, 3 5985; GFX9-NEXT: v_mov_b32_e32 v11, 3 5986; GFX9-NEXT: v_mov_b32_e32 v12, 4 5987; GFX9-NEXT: v_mov_b32_e32 v13, 4 5988; GFX9-NEXT: v_mov_b32_e32 v14, 4 5989; GFX9-NEXT: v_mov_b32_e32 v15, 5 5990; GFX9-NEXT: v_mov_b32_e32 v16, 5 5991; GFX9-NEXT: v_mov_b32_e32 v17, 5 5992; GFX9-NEXT: v_mov_b32_e32 v18, 6 5993; GFX9-NEXT: v_mov_b32_e32 v19, 6 5994; GFX9-NEXT: v_mov_b32_e32 v20, 6 5995; GFX9-NEXT: v_mov_b32_e32 v21, 7 5996; GFX9-NEXT: v_mov_b32_e32 v22, 7 5997; GFX9-NEXT: v_mov_b32_e32 v23, 7 5998; GFX9-NEXT: v_mov_b32_e32 v24, 8 5999; GFX9-NEXT: v_mov_b32_e32 v25, 8 6000; GFX9-NEXT: v_mov_b32_e32 v26, 8 6001; GFX9-NEXT: v_mov_b32_e32 v27, 9 6002; GFX9-NEXT: v_mov_b32_e32 v28, 9 6003; GFX9-NEXT: v_mov_b32_e32 v29, 9 6004; GFX9-NEXT: v_mov_b32_e32 v30, 10 6005; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6006; GFX9-NEXT: s_getpc_b64 s[4:5] 6007; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 6008; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 6009; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 6010; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6011; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6012; GFX9-NEXT: s_mov_b32 s32, s33 6013; GFX9-NEXT: v_readlane_b32 s4, v40, 2 6014; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 6015; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6016; GFX9-NEXT: s_mov_b64 exec, s[6:7] 6017; GFX9-NEXT: s_mov_b32 s33, s4 6018; GFX9-NEXT: s_waitcnt vmcnt(0) 6019; GFX9-NEXT: s_setpc_b64 s[30:31] 6020; 6021; GFX11-LABEL: stack_12xv3i32: 6022; GFX11: ; %bb.0: ; %entry 6023; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6024; GFX11-NEXT: s_mov_b32 s0, s33 6025; GFX11-NEXT: s_mov_b32 s33, s32 6026; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6027; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 6028; GFX11-NEXT: s_mov_b32 exec_lo, s1 6029; GFX11-NEXT: v_writelane_b32 v40, s0, 2 6030; GFX11-NEXT: v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12 6031; GFX11-NEXT: v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14 6032; GFX11-NEXT: v_mov_b32_e32 v4, 15 6033; GFX11-NEXT: s_add_i32 s32, s32, 16 6034; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6035; GFX11-NEXT: s_add_i32 s0, s32, 16 6036; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 6037; GFX11-NEXT: scratch_store_b32 off, v4, s0 6038; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0 6039; GFX11-NEXT: v_dual_mov_b32 v3, 1 :: v_dual_mov_b32 v2, 0 6040; GFX11-NEXT: v_dual_mov_b32 v5, 1 :: v_dual_mov_b32 v4, 1 6041; GFX11-NEXT: v_dual_mov_b32 v7, 2 :: v_dual_mov_b32 v6, 2 6042; GFX11-NEXT: v_dual_mov_b32 v9, 3 :: v_dual_mov_b32 v8, 2 6043; GFX11-NEXT: v_dual_mov_b32 v11, 3 :: v_dual_mov_b32 v10, 3 6044; GFX11-NEXT: v_dual_mov_b32 v13, 4 :: v_dual_mov_b32 v12, 4 6045; GFX11-NEXT: v_dual_mov_b32 v15, 5 :: v_dual_mov_b32 v14, 4 6046; GFX11-NEXT: v_dual_mov_b32 v17, 5 :: v_dual_mov_b32 v16, 5 6047; GFX11-NEXT: v_dual_mov_b32 v19, 6 :: v_dual_mov_b32 v18, 6 6048; GFX11-NEXT: v_dual_mov_b32 v21, 7 :: v_dual_mov_b32 v20, 6 6049; GFX11-NEXT: v_dual_mov_b32 v23, 7 :: v_dual_mov_b32 v22, 7 6050; GFX11-NEXT: v_dual_mov_b32 v25, 8 :: v_dual_mov_b32 v24, 8 6051; GFX11-NEXT: v_dual_mov_b32 v27, 9 :: v_dual_mov_b32 v26, 8 6052; GFX11-NEXT: v_dual_mov_b32 v29, 9 :: v_dual_mov_b32 v28, 9 6053; GFX11-NEXT: v_mov_b32_e32 v30, 10 6054; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6055; GFX11-NEXT: s_getpc_b64 s[0:1] 6056; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 6057; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 6058; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6059; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6060; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6061; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6062; GFX11-NEXT: s_mov_b32 s32, s33 6063; GFX11-NEXT: v_readlane_b32 s0, v40, 2 6064; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6065; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 6066; GFX11-NEXT: s_mov_b32 exec_lo, s1 6067; GFX11-NEXT: s_mov_b32 s33, s0 6068; GFX11-NEXT: s_waitcnt vmcnt(0) 6069; GFX11-NEXT: s_setpc_b64 s[30:31] 6070; 6071; HSA-LABEL: stack_12xv3i32: 6072; HSA: ; %bb.0: ; %entry 6073; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6074; HSA-NEXT: s_mov_b32 s4, s33 6075; HSA-NEXT: s_mov_b32 s33, s32 6076; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 6077; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6078; HSA-NEXT: s_mov_b64 exec, s[8:9] 6079; HSA-NEXT: s_addk_i32 s32, 0x400 6080; HSA-NEXT: v_mov_b32_e32 v0, 11 6081; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 6082; HSA-NEXT: v_mov_b32_e32 v0, 12 6083; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6084; HSA-NEXT: v_mov_b32_e32 v0, 13 6085; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6086; HSA-NEXT: v_mov_b32_e32 v0, 14 6087; HSA-NEXT: v_writelane_b32 v40, s4, 2 6088; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6089; HSA-NEXT: v_mov_b32_e32 v0, 15 6090; HSA-NEXT: v_writelane_b32 v40, s30, 0 6091; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6092; HSA-NEXT: v_mov_b32_e32 v0, 0 6093; HSA-NEXT: v_mov_b32_e32 v1, 0 6094; HSA-NEXT: v_mov_b32_e32 v2, 0 6095; HSA-NEXT: v_mov_b32_e32 v3, 1 6096; HSA-NEXT: v_mov_b32_e32 v4, 1 6097; HSA-NEXT: v_mov_b32_e32 v5, 1 6098; HSA-NEXT: v_mov_b32_e32 v6, 2 6099; HSA-NEXT: v_mov_b32_e32 v7, 2 6100; HSA-NEXT: v_mov_b32_e32 v8, 2 6101; HSA-NEXT: v_mov_b32_e32 v9, 3 6102; HSA-NEXT: v_mov_b32_e32 v10, 3 6103; HSA-NEXT: v_mov_b32_e32 v11, 3 6104; HSA-NEXT: v_mov_b32_e32 v12, 4 6105; HSA-NEXT: v_mov_b32_e32 v13, 4 6106; HSA-NEXT: v_mov_b32_e32 v14, 4 6107; HSA-NEXT: v_mov_b32_e32 v15, 5 6108; HSA-NEXT: v_mov_b32_e32 v16, 5 6109; HSA-NEXT: v_mov_b32_e32 v17, 5 6110; HSA-NEXT: v_mov_b32_e32 v18, 6 6111; HSA-NEXT: v_mov_b32_e32 v19, 6 6112; HSA-NEXT: v_mov_b32_e32 v20, 6 6113; HSA-NEXT: v_mov_b32_e32 v21, 7 6114; HSA-NEXT: v_mov_b32_e32 v22, 7 6115; HSA-NEXT: v_mov_b32_e32 v23, 7 6116; HSA-NEXT: v_mov_b32_e32 v24, 8 6117; HSA-NEXT: v_mov_b32_e32 v25, 8 6118; HSA-NEXT: v_mov_b32_e32 v26, 8 6119; HSA-NEXT: v_mov_b32_e32 v27, 9 6120; HSA-NEXT: v_mov_b32_e32 v28, 9 6121; HSA-NEXT: v_mov_b32_e32 v29, 9 6122; HSA-NEXT: v_mov_b32_e32 v30, 10 6123; HSA-NEXT: v_writelane_b32 v40, s31, 1 6124; HSA-NEXT: s_getpc_b64 s[4:5] 6125; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4 6126; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12 6127; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] 6128; HSA-NEXT: v_readlane_b32 s31, v40, 1 6129; HSA-NEXT: v_readlane_b32 s30, v40, 0 6130; HSA-NEXT: s_mov_b32 s32, s33 6131; HSA-NEXT: v_readlane_b32 s4, v40, 2 6132; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 6133; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6134; HSA-NEXT: s_mov_b64 exec, s[6:7] 6135; HSA-NEXT: s_mov_b32 s33, s4 6136; HSA-NEXT: s_waitcnt vmcnt(0) 6137; HSA-NEXT: s_setpc_b64 s[30:31] 6138entry: 6139 call void @external_void_func_12xv3i32( 6140 <3 x i32><i32 0, i32 0, i32 0>, 6141 <3 x i32><i32 1, i32 1, i32 1>, 6142 <3 x i32><i32 2, i32 2, i32 2>, 6143 <3 x i32><i32 3, i32 3, i32 3>, 6144 <3 x i32><i32 4, i32 4, i32 4>, 6145 <3 x i32><i32 5, i32 5, i32 5>, 6146 <3 x i32><i32 6, i32 6, i32 6>, 6147 <3 x i32><i32 7, i32 7, i32 7>, 6148 <3 x i32><i32 8, i32 8, i32 8>, 6149 <3 x i32><i32 9, i32 9, i32 9>, 6150 <3 x i32><i32 10, i32 11, i32 12>, 6151 <3 x i32><i32 13, i32 14, i32 15>) 6152 ret void 6153} 6154 6155define void @stack_12xv3f32() #0 { 6156; VI-LABEL: stack_12xv3f32: 6157; VI: ; %bb.0: ; %entry 6158; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6159; VI-NEXT: s_mov_b32 s4, s33 6160; VI-NEXT: s_mov_b32 s33, s32 6161; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 6162; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6163; VI-NEXT: s_mov_b64 exec, s[8:9] 6164; VI-NEXT: s_addk_i32 s32, 0x400 6165; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 6166; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6167; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 6168; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6169; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 6170; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6171; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 6172; VI-NEXT: v_writelane_b32 v40, s4, 2 6173; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6174; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 6175; VI-NEXT: v_writelane_b32 v40, s30, 0 6176; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6177; VI-NEXT: v_mov_b32_e32 v0, 0 6178; VI-NEXT: v_mov_b32_e32 v1, 0 6179; VI-NEXT: v_mov_b32_e32 v2, 0 6180; VI-NEXT: v_mov_b32_e32 v3, 1.0 6181; VI-NEXT: v_mov_b32_e32 v4, 1.0 6182; VI-NEXT: v_mov_b32_e32 v5, 1.0 6183; VI-NEXT: v_mov_b32_e32 v6, 2.0 6184; VI-NEXT: v_mov_b32_e32 v7, 2.0 6185; VI-NEXT: v_mov_b32_e32 v8, 2.0 6186; VI-NEXT: v_mov_b32_e32 v9, 0x40400000 6187; VI-NEXT: v_mov_b32_e32 v10, 0x40400000 6188; VI-NEXT: v_mov_b32_e32 v11, 0x40400000 6189; VI-NEXT: v_mov_b32_e32 v12, 4.0 6190; VI-NEXT: v_mov_b32_e32 v13, 4.0 6191; VI-NEXT: v_mov_b32_e32 v14, 4.0 6192; VI-NEXT: v_mov_b32_e32 v15, 0x40a00000 6193; VI-NEXT: v_mov_b32_e32 v16, 0x40a00000 6194; VI-NEXT: v_mov_b32_e32 v17, 0x40a00000 6195; VI-NEXT: v_mov_b32_e32 v18, 0x40c00000 6196; VI-NEXT: v_mov_b32_e32 v19, 0x40c00000 6197; VI-NEXT: v_mov_b32_e32 v20, 0x40c00000 6198; VI-NEXT: v_mov_b32_e32 v21, 0x40e00000 6199; VI-NEXT: v_mov_b32_e32 v22, 0x40e00000 6200; VI-NEXT: v_mov_b32_e32 v23, 0x40e00000 6201; VI-NEXT: v_mov_b32_e32 v24, 0x41000000 6202; VI-NEXT: v_mov_b32_e32 v25, 0x41000000 6203; VI-NEXT: v_mov_b32_e32 v26, 0x41000000 6204; VI-NEXT: v_mov_b32_e32 v27, 0x41100000 6205; VI-NEXT: v_mov_b32_e32 v28, 0x41100000 6206; VI-NEXT: v_mov_b32_e32 v29, 0x41100000 6207; VI-NEXT: v_mov_b32_e32 v30, 0x41200000 6208; VI-NEXT: v_writelane_b32 v40, s31, 1 6209; VI-NEXT: s_getpc_b64 s[4:5] 6210; VI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 6211; VI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 6212; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 6213; VI-NEXT: v_readlane_b32 s31, v40, 1 6214; VI-NEXT: v_readlane_b32 s30, v40, 0 6215; VI-NEXT: s_mov_b32 s32, s33 6216; VI-NEXT: v_readlane_b32 s4, v40, 2 6217; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 6218; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6219; VI-NEXT: s_mov_b64 exec, s[6:7] 6220; VI-NEXT: s_mov_b32 s33, s4 6221; VI-NEXT: s_waitcnt vmcnt(0) 6222; VI-NEXT: s_setpc_b64 s[30:31] 6223; 6224; CI-LABEL: stack_12xv3f32: 6225; CI: ; %bb.0: ; %entry 6226; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6227; CI-NEXT: s_mov_b32 s4, s33 6228; CI-NEXT: s_mov_b32 s33, s32 6229; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 6230; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6231; CI-NEXT: s_mov_b64 exec, s[8:9] 6232; CI-NEXT: s_addk_i32 s32, 0x400 6233; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 6234; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6235; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 6236; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6237; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 6238; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6239; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 6240; CI-NEXT: v_writelane_b32 v40, s4, 2 6241; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6242; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 6243; CI-NEXT: v_writelane_b32 v40, s30, 0 6244; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6245; CI-NEXT: v_mov_b32_e32 v0, 0 6246; CI-NEXT: v_mov_b32_e32 v1, 0 6247; CI-NEXT: v_mov_b32_e32 v2, 0 6248; CI-NEXT: v_mov_b32_e32 v3, 1.0 6249; CI-NEXT: v_mov_b32_e32 v4, 1.0 6250; CI-NEXT: v_mov_b32_e32 v5, 1.0 6251; CI-NEXT: v_mov_b32_e32 v6, 2.0 6252; CI-NEXT: v_mov_b32_e32 v7, 2.0 6253; CI-NEXT: v_mov_b32_e32 v8, 2.0 6254; CI-NEXT: v_mov_b32_e32 v9, 0x40400000 6255; CI-NEXT: v_mov_b32_e32 v10, 0x40400000 6256; CI-NEXT: v_mov_b32_e32 v11, 0x40400000 6257; CI-NEXT: v_mov_b32_e32 v12, 4.0 6258; CI-NEXT: v_mov_b32_e32 v13, 4.0 6259; CI-NEXT: v_mov_b32_e32 v14, 4.0 6260; CI-NEXT: v_mov_b32_e32 v15, 0x40a00000 6261; CI-NEXT: v_mov_b32_e32 v16, 0x40a00000 6262; CI-NEXT: v_mov_b32_e32 v17, 0x40a00000 6263; CI-NEXT: v_mov_b32_e32 v18, 0x40c00000 6264; CI-NEXT: v_mov_b32_e32 v19, 0x40c00000 6265; CI-NEXT: v_mov_b32_e32 v20, 0x40c00000 6266; CI-NEXT: v_mov_b32_e32 v21, 0x40e00000 6267; CI-NEXT: v_mov_b32_e32 v22, 0x40e00000 6268; CI-NEXT: v_mov_b32_e32 v23, 0x40e00000 6269; CI-NEXT: v_mov_b32_e32 v24, 0x41000000 6270; CI-NEXT: v_mov_b32_e32 v25, 0x41000000 6271; CI-NEXT: v_mov_b32_e32 v26, 0x41000000 6272; CI-NEXT: v_mov_b32_e32 v27, 0x41100000 6273; CI-NEXT: v_mov_b32_e32 v28, 0x41100000 6274; CI-NEXT: v_mov_b32_e32 v29, 0x41100000 6275; CI-NEXT: v_mov_b32_e32 v30, 0x41200000 6276; CI-NEXT: v_writelane_b32 v40, s31, 1 6277; CI-NEXT: s_getpc_b64 s[4:5] 6278; CI-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 6279; CI-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 6280; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 6281; CI-NEXT: v_readlane_b32 s31, v40, 1 6282; CI-NEXT: v_readlane_b32 s30, v40, 0 6283; CI-NEXT: s_mov_b32 s32, s33 6284; CI-NEXT: v_readlane_b32 s4, v40, 2 6285; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 6286; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6287; CI-NEXT: s_mov_b64 exec, s[6:7] 6288; CI-NEXT: s_mov_b32 s33, s4 6289; CI-NEXT: s_waitcnt vmcnt(0) 6290; CI-NEXT: s_setpc_b64 s[30:31] 6291; 6292; GFX9-LABEL: stack_12xv3f32: 6293; GFX9: ; %bb.0: ; %entry 6294; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6295; GFX9-NEXT: s_mov_b32 s4, s33 6296; GFX9-NEXT: s_mov_b32 s33, s32 6297; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 6298; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6299; GFX9-NEXT: s_mov_b64 exec, s[8:9] 6300; GFX9-NEXT: s_addk_i32 s32, 0x400 6301; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 6302; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 6303; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 6304; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6305; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 6306; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6307; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 6308; GFX9-NEXT: v_writelane_b32 v40, s4, 2 6309; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6310; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 6311; GFX9-NEXT: v_writelane_b32 v40, s30, 0 6312; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6313; GFX9-NEXT: v_mov_b32_e32 v0, 0 6314; GFX9-NEXT: v_mov_b32_e32 v1, 0 6315; GFX9-NEXT: v_mov_b32_e32 v2, 0 6316; GFX9-NEXT: v_mov_b32_e32 v3, 1.0 6317; GFX9-NEXT: v_mov_b32_e32 v4, 1.0 6318; GFX9-NEXT: v_mov_b32_e32 v5, 1.0 6319; GFX9-NEXT: v_mov_b32_e32 v6, 2.0 6320; GFX9-NEXT: v_mov_b32_e32 v7, 2.0 6321; GFX9-NEXT: v_mov_b32_e32 v8, 2.0 6322; GFX9-NEXT: v_mov_b32_e32 v9, 0x40400000 6323; GFX9-NEXT: v_mov_b32_e32 v10, 0x40400000 6324; GFX9-NEXT: v_mov_b32_e32 v11, 0x40400000 6325; GFX9-NEXT: v_mov_b32_e32 v12, 4.0 6326; GFX9-NEXT: v_mov_b32_e32 v13, 4.0 6327; GFX9-NEXT: v_mov_b32_e32 v14, 4.0 6328; GFX9-NEXT: v_mov_b32_e32 v15, 0x40a00000 6329; GFX9-NEXT: v_mov_b32_e32 v16, 0x40a00000 6330; GFX9-NEXT: v_mov_b32_e32 v17, 0x40a00000 6331; GFX9-NEXT: v_mov_b32_e32 v18, 0x40c00000 6332; GFX9-NEXT: v_mov_b32_e32 v19, 0x40c00000 6333; GFX9-NEXT: v_mov_b32_e32 v20, 0x40c00000 6334; GFX9-NEXT: v_mov_b32_e32 v21, 0x40e00000 6335; GFX9-NEXT: v_mov_b32_e32 v22, 0x40e00000 6336; GFX9-NEXT: v_mov_b32_e32 v23, 0x40e00000 6337; GFX9-NEXT: v_mov_b32_e32 v24, 0x41000000 6338; GFX9-NEXT: v_mov_b32_e32 v25, 0x41000000 6339; GFX9-NEXT: v_mov_b32_e32 v26, 0x41000000 6340; GFX9-NEXT: v_mov_b32_e32 v27, 0x41100000 6341; GFX9-NEXT: v_mov_b32_e32 v28, 0x41100000 6342; GFX9-NEXT: v_mov_b32_e32 v29, 0x41100000 6343; GFX9-NEXT: v_mov_b32_e32 v30, 0x41200000 6344; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6345; GFX9-NEXT: s_getpc_b64 s[4:5] 6346; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 6347; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 6348; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 6349; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6350; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6351; GFX9-NEXT: s_mov_b32 s32, s33 6352; GFX9-NEXT: v_readlane_b32 s4, v40, 2 6353; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 6354; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6355; GFX9-NEXT: s_mov_b64 exec, s[6:7] 6356; GFX9-NEXT: s_mov_b32 s33, s4 6357; GFX9-NEXT: s_waitcnt vmcnt(0) 6358; GFX9-NEXT: s_setpc_b64 s[30:31] 6359; 6360; GFX11-LABEL: stack_12xv3f32: 6361; GFX11: ; %bb.0: ; %entry 6362; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6363; GFX11-NEXT: s_mov_b32 s0, s33 6364; GFX11-NEXT: s_mov_b32 s33, s32 6365; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6366; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 6367; GFX11-NEXT: s_mov_b32 exec_lo, s1 6368; GFX11-NEXT: v_writelane_b32 v40, s0, 2 6369; GFX11-NEXT: v_mov_b32_e32 v0, 0x41300000 6370; GFX11-NEXT: v_mov_b32_e32 v1, 0x41400000 6371; GFX11-NEXT: v_mov_b32_e32 v2, 0x41500000 6372; GFX11-NEXT: v_mov_b32_e32 v3, 0x41600000 6373; GFX11-NEXT: v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0 6374; GFX11-NEXT: s_add_i32 s32, s32, 16 6375; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6376; GFX11-NEXT: s_add_i32 s0, s32, 16 6377; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 6378; GFX11-NEXT: scratch_store_b32 off, v4, s0 6379; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 6380; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0 6381; GFX11-NEXT: v_dual_mov_b32 v4, 1.0 :: v_dual_mov_b32 v7, 2.0 6382; GFX11-NEXT: v_dual_mov_b32 v6, 2.0 :: v_dual_mov_b32 v9, 0x40400000 6383; GFX11-NEXT: v_dual_mov_b32 v8, 2.0 :: v_dual_mov_b32 v11, 0x40400000 6384; GFX11-NEXT: v_dual_mov_b32 v10, 0x40400000 :: v_dual_mov_b32 v13, 4.0 6385; GFX11-NEXT: v_dual_mov_b32 v12, 4.0 :: v_dual_mov_b32 v15, 0x40a00000 6386; GFX11-NEXT: v_dual_mov_b32 v14, 4.0 :: v_dual_mov_b32 v17, 0x40a00000 6387; GFX11-NEXT: v_mov_b32_e32 v16, 0x40a00000 6388; GFX11-NEXT: v_dual_mov_b32 v18, 0x40c00000 :: v_dual_mov_b32 v19, 0x40c00000 6389; GFX11-NEXT: v_mov_b32_e32 v20, 0x40c00000 6390; GFX11-NEXT: v_dual_mov_b32 v21, 0x40e00000 :: v_dual_mov_b32 v22, 0x40e00000 6391; GFX11-NEXT: v_mov_b32_e32 v23, 0x40e00000 6392; GFX11-NEXT: v_dual_mov_b32 v24, 0x41000000 :: v_dual_mov_b32 v25, 0x41000000 6393; GFX11-NEXT: v_mov_b32_e32 v26, 0x41000000 6394; GFX11-NEXT: v_dual_mov_b32 v27, 0x41100000 :: v_dual_mov_b32 v28, 0x41100000 6395; GFX11-NEXT: v_mov_b32_e32 v29, 0x41100000 6396; GFX11-NEXT: v_mov_b32_e32 v30, 0x41200000 6397; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6398; GFX11-NEXT: s_getpc_b64 s[0:1] 6399; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4 6400; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12 6401; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6402; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6403; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6404; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6405; GFX11-NEXT: s_mov_b32 s32, s33 6406; GFX11-NEXT: v_readlane_b32 s0, v40, 2 6407; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6408; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 6409; GFX11-NEXT: s_mov_b32 exec_lo, s1 6410; GFX11-NEXT: s_mov_b32 s33, s0 6411; GFX11-NEXT: s_waitcnt vmcnt(0) 6412; GFX11-NEXT: s_setpc_b64 s[30:31] 6413; 6414; HSA-LABEL: stack_12xv3f32: 6415; HSA: ; %bb.0: ; %entry 6416; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6417; HSA-NEXT: s_mov_b32 s4, s33 6418; HSA-NEXT: s_mov_b32 s33, s32 6419; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 6420; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6421; HSA-NEXT: s_mov_b64 exec, s[8:9] 6422; HSA-NEXT: s_addk_i32 s32, 0x400 6423; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 6424; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 6425; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 6426; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6427; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 6428; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6429; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 6430; HSA-NEXT: v_writelane_b32 v40, s4, 2 6431; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6432; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 6433; HSA-NEXT: v_writelane_b32 v40, s30, 0 6434; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6435; HSA-NEXT: v_mov_b32_e32 v0, 0 6436; HSA-NEXT: v_mov_b32_e32 v1, 0 6437; HSA-NEXT: v_mov_b32_e32 v2, 0 6438; HSA-NEXT: v_mov_b32_e32 v3, 1.0 6439; HSA-NEXT: v_mov_b32_e32 v4, 1.0 6440; HSA-NEXT: v_mov_b32_e32 v5, 1.0 6441; HSA-NEXT: v_mov_b32_e32 v6, 2.0 6442; HSA-NEXT: v_mov_b32_e32 v7, 2.0 6443; HSA-NEXT: v_mov_b32_e32 v8, 2.0 6444; HSA-NEXT: v_mov_b32_e32 v9, 0x40400000 6445; HSA-NEXT: v_mov_b32_e32 v10, 0x40400000 6446; HSA-NEXT: v_mov_b32_e32 v11, 0x40400000 6447; HSA-NEXT: v_mov_b32_e32 v12, 4.0 6448; HSA-NEXT: v_mov_b32_e32 v13, 4.0 6449; HSA-NEXT: v_mov_b32_e32 v14, 4.0 6450; HSA-NEXT: v_mov_b32_e32 v15, 0x40a00000 6451; HSA-NEXT: v_mov_b32_e32 v16, 0x40a00000 6452; HSA-NEXT: v_mov_b32_e32 v17, 0x40a00000 6453; HSA-NEXT: v_mov_b32_e32 v18, 0x40c00000 6454; HSA-NEXT: v_mov_b32_e32 v19, 0x40c00000 6455; HSA-NEXT: v_mov_b32_e32 v20, 0x40c00000 6456; HSA-NEXT: v_mov_b32_e32 v21, 0x40e00000 6457; HSA-NEXT: v_mov_b32_e32 v22, 0x40e00000 6458; HSA-NEXT: v_mov_b32_e32 v23, 0x40e00000 6459; HSA-NEXT: v_mov_b32_e32 v24, 0x41000000 6460; HSA-NEXT: v_mov_b32_e32 v25, 0x41000000 6461; HSA-NEXT: v_mov_b32_e32 v26, 0x41000000 6462; HSA-NEXT: v_mov_b32_e32 v27, 0x41100000 6463; HSA-NEXT: v_mov_b32_e32 v28, 0x41100000 6464; HSA-NEXT: v_mov_b32_e32 v29, 0x41100000 6465; HSA-NEXT: v_mov_b32_e32 v30, 0x41200000 6466; HSA-NEXT: v_writelane_b32 v40, s31, 1 6467; HSA-NEXT: s_getpc_b64 s[4:5] 6468; HSA-NEXT: s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4 6469; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12 6470; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] 6471; HSA-NEXT: v_readlane_b32 s31, v40, 1 6472; HSA-NEXT: v_readlane_b32 s30, v40, 0 6473; HSA-NEXT: s_mov_b32 s32, s33 6474; HSA-NEXT: v_readlane_b32 s4, v40, 2 6475; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 6476; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6477; HSA-NEXT: s_mov_b64 exec, s[6:7] 6478; HSA-NEXT: s_mov_b32 s33, s4 6479; HSA-NEXT: s_waitcnt vmcnt(0) 6480; HSA-NEXT: s_setpc_b64 s[30:31] 6481entry: 6482 call void @external_void_func_12xv3f32( 6483 <3 x float><float 0.0, float 0.0, float 0.0>, 6484 <3 x float><float 1.0, float 1.0, float 1.0>, 6485 <3 x float><float 2.0, float 2.0, float 2.0>, 6486 <3 x float><float 3.0, float 3.0, float 3.0>, 6487 <3 x float><float 4.0, float 4.0, float 4.0>, 6488 <3 x float><float 5.0, float 5.0, float 5.0>, 6489 <3 x float><float 6.0, float 6.0, float 6.0>, 6490 <3 x float><float 7.0, float 7.0, float 7.0>, 6491 <3 x float><float 8.0, float 8.0, float 8.0>, 6492 <3 x float><float 9.0, float 9.0, float 9.0>, 6493 <3 x float><float 10.0, float 11.0, float 12.0>, 6494 <3 x float><float 13.0, float 14.0, float 15.0>) 6495 ret void 6496} 6497 6498define void @stack_8xv5i32() #0 { 6499; VI-LABEL: stack_8xv5i32: 6500; VI: ; %bb.0: ; %entry 6501; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6502; VI-NEXT: s_mov_b32 s4, s33 6503; VI-NEXT: s_mov_b32 s33, s32 6504; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 6505; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6506; VI-NEXT: s_mov_b64 exec, s[8:9] 6507; VI-NEXT: s_addk_i32 s32, 0x400 6508; VI-NEXT: v_mov_b32_e32 v0, 7 6509; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6510; VI-NEXT: v_mov_b32_e32 v0, 8 6511; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6512; VI-NEXT: v_mov_b32_e32 v0, 9 6513; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6514; VI-NEXT: v_mov_b32_e32 v0, 10 6515; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6516; VI-NEXT: v_mov_b32_e32 v0, 11 6517; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6518; VI-NEXT: v_mov_b32_e32 v0, 12 6519; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6520; VI-NEXT: v_mov_b32_e32 v0, 13 6521; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6522; VI-NEXT: v_mov_b32_e32 v0, 14 6523; VI-NEXT: v_writelane_b32 v40, s4, 2 6524; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6525; VI-NEXT: v_mov_b32_e32 v0, 15 6526; VI-NEXT: v_writelane_b32 v40, s30, 0 6527; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6528; VI-NEXT: v_mov_b32_e32 v0, 0 6529; VI-NEXT: v_mov_b32_e32 v1, 0 6530; VI-NEXT: v_mov_b32_e32 v2, 0 6531; VI-NEXT: v_mov_b32_e32 v3, 0 6532; VI-NEXT: v_mov_b32_e32 v4, 0 6533; VI-NEXT: v_mov_b32_e32 v5, 1 6534; VI-NEXT: v_mov_b32_e32 v6, 1 6535; VI-NEXT: v_mov_b32_e32 v7, 1 6536; VI-NEXT: v_mov_b32_e32 v8, 1 6537; VI-NEXT: v_mov_b32_e32 v9, 1 6538; VI-NEXT: v_mov_b32_e32 v10, 2 6539; VI-NEXT: v_mov_b32_e32 v11, 2 6540; VI-NEXT: v_mov_b32_e32 v12, 2 6541; VI-NEXT: v_mov_b32_e32 v13, 2 6542; VI-NEXT: v_mov_b32_e32 v14, 2 6543; VI-NEXT: v_mov_b32_e32 v15, 3 6544; VI-NEXT: v_mov_b32_e32 v16, 3 6545; VI-NEXT: v_mov_b32_e32 v17, 3 6546; VI-NEXT: v_mov_b32_e32 v18, 3 6547; VI-NEXT: v_mov_b32_e32 v19, 3 6548; VI-NEXT: v_mov_b32_e32 v20, 4 6549; VI-NEXT: v_mov_b32_e32 v21, 4 6550; VI-NEXT: v_mov_b32_e32 v22, 4 6551; VI-NEXT: v_mov_b32_e32 v23, 4 6552; VI-NEXT: v_mov_b32_e32 v24, 4 6553; VI-NEXT: v_mov_b32_e32 v25, 5 6554; VI-NEXT: v_mov_b32_e32 v26, 5 6555; VI-NEXT: v_mov_b32_e32 v27, 5 6556; VI-NEXT: v_mov_b32_e32 v28, 5 6557; VI-NEXT: v_mov_b32_e32 v29, 5 6558; VI-NEXT: v_mov_b32_e32 v30, 6 6559; VI-NEXT: v_writelane_b32 v40, s31, 1 6560; VI-NEXT: s_getpc_b64 s[4:5] 6561; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 6562; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 6563; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 6564; VI-NEXT: v_readlane_b32 s31, v40, 1 6565; VI-NEXT: v_readlane_b32 s30, v40, 0 6566; VI-NEXT: s_mov_b32 s32, s33 6567; VI-NEXT: v_readlane_b32 s4, v40, 2 6568; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 6569; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6570; VI-NEXT: s_mov_b64 exec, s[6:7] 6571; VI-NEXT: s_mov_b32 s33, s4 6572; VI-NEXT: s_waitcnt vmcnt(0) 6573; VI-NEXT: s_setpc_b64 s[30:31] 6574; 6575; CI-LABEL: stack_8xv5i32: 6576; CI: ; %bb.0: ; %entry 6577; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6578; CI-NEXT: s_mov_b32 s4, s33 6579; CI-NEXT: s_mov_b32 s33, s32 6580; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 6581; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6582; CI-NEXT: s_mov_b64 exec, s[8:9] 6583; CI-NEXT: s_addk_i32 s32, 0x400 6584; CI-NEXT: v_mov_b32_e32 v0, 7 6585; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6586; CI-NEXT: v_mov_b32_e32 v0, 8 6587; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6588; CI-NEXT: v_mov_b32_e32 v0, 9 6589; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6590; CI-NEXT: v_mov_b32_e32 v0, 10 6591; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6592; CI-NEXT: v_mov_b32_e32 v0, 11 6593; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6594; CI-NEXT: v_mov_b32_e32 v0, 12 6595; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6596; CI-NEXT: v_mov_b32_e32 v0, 13 6597; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6598; CI-NEXT: v_mov_b32_e32 v0, 14 6599; CI-NEXT: v_writelane_b32 v40, s4, 2 6600; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6601; CI-NEXT: v_mov_b32_e32 v0, 15 6602; CI-NEXT: v_writelane_b32 v40, s30, 0 6603; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6604; CI-NEXT: v_mov_b32_e32 v0, 0 6605; CI-NEXT: v_mov_b32_e32 v1, 0 6606; CI-NEXT: v_mov_b32_e32 v2, 0 6607; CI-NEXT: v_mov_b32_e32 v3, 0 6608; CI-NEXT: v_mov_b32_e32 v4, 0 6609; CI-NEXT: v_mov_b32_e32 v5, 1 6610; CI-NEXT: v_mov_b32_e32 v6, 1 6611; CI-NEXT: v_mov_b32_e32 v7, 1 6612; CI-NEXT: v_mov_b32_e32 v8, 1 6613; CI-NEXT: v_mov_b32_e32 v9, 1 6614; CI-NEXT: v_mov_b32_e32 v10, 2 6615; CI-NEXT: v_mov_b32_e32 v11, 2 6616; CI-NEXT: v_mov_b32_e32 v12, 2 6617; CI-NEXT: v_mov_b32_e32 v13, 2 6618; CI-NEXT: v_mov_b32_e32 v14, 2 6619; CI-NEXT: v_mov_b32_e32 v15, 3 6620; CI-NEXT: v_mov_b32_e32 v16, 3 6621; CI-NEXT: v_mov_b32_e32 v17, 3 6622; CI-NEXT: v_mov_b32_e32 v18, 3 6623; CI-NEXT: v_mov_b32_e32 v19, 3 6624; CI-NEXT: v_mov_b32_e32 v20, 4 6625; CI-NEXT: v_mov_b32_e32 v21, 4 6626; CI-NEXT: v_mov_b32_e32 v22, 4 6627; CI-NEXT: v_mov_b32_e32 v23, 4 6628; CI-NEXT: v_mov_b32_e32 v24, 4 6629; CI-NEXT: v_mov_b32_e32 v25, 5 6630; CI-NEXT: v_mov_b32_e32 v26, 5 6631; CI-NEXT: v_mov_b32_e32 v27, 5 6632; CI-NEXT: v_mov_b32_e32 v28, 5 6633; CI-NEXT: v_mov_b32_e32 v29, 5 6634; CI-NEXT: v_mov_b32_e32 v30, 6 6635; CI-NEXT: v_writelane_b32 v40, s31, 1 6636; CI-NEXT: s_getpc_b64 s[4:5] 6637; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 6638; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 6639; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 6640; CI-NEXT: v_readlane_b32 s31, v40, 1 6641; CI-NEXT: v_readlane_b32 s30, v40, 0 6642; CI-NEXT: s_mov_b32 s32, s33 6643; CI-NEXT: v_readlane_b32 s4, v40, 2 6644; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 6645; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6646; CI-NEXT: s_mov_b64 exec, s[6:7] 6647; CI-NEXT: s_mov_b32 s33, s4 6648; CI-NEXT: s_waitcnt vmcnt(0) 6649; CI-NEXT: s_setpc_b64 s[30:31] 6650; 6651; GFX9-LABEL: stack_8xv5i32: 6652; GFX9: ; %bb.0: ; %entry 6653; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6654; GFX9-NEXT: s_mov_b32 s4, s33 6655; GFX9-NEXT: s_mov_b32 s33, s32 6656; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 6657; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6658; GFX9-NEXT: s_mov_b64 exec, s[8:9] 6659; GFX9-NEXT: s_addk_i32 s32, 0x400 6660; GFX9-NEXT: v_mov_b32_e32 v0, 7 6661; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 6662; GFX9-NEXT: v_mov_b32_e32 v0, 8 6663; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6664; GFX9-NEXT: v_mov_b32_e32 v0, 9 6665; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6666; GFX9-NEXT: v_mov_b32_e32 v0, 10 6667; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6668; GFX9-NEXT: v_mov_b32_e32 v0, 11 6669; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6670; GFX9-NEXT: v_mov_b32_e32 v0, 12 6671; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6672; GFX9-NEXT: v_mov_b32_e32 v0, 13 6673; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6674; GFX9-NEXT: v_mov_b32_e32 v0, 14 6675; GFX9-NEXT: v_writelane_b32 v40, s4, 2 6676; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6677; GFX9-NEXT: v_mov_b32_e32 v0, 15 6678; GFX9-NEXT: v_writelane_b32 v40, s30, 0 6679; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6680; GFX9-NEXT: v_mov_b32_e32 v0, 0 6681; GFX9-NEXT: v_mov_b32_e32 v1, 0 6682; GFX9-NEXT: v_mov_b32_e32 v2, 0 6683; GFX9-NEXT: v_mov_b32_e32 v3, 0 6684; GFX9-NEXT: v_mov_b32_e32 v4, 0 6685; GFX9-NEXT: v_mov_b32_e32 v5, 1 6686; GFX9-NEXT: v_mov_b32_e32 v6, 1 6687; GFX9-NEXT: v_mov_b32_e32 v7, 1 6688; GFX9-NEXT: v_mov_b32_e32 v8, 1 6689; GFX9-NEXT: v_mov_b32_e32 v9, 1 6690; GFX9-NEXT: v_mov_b32_e32 v10, 2 6691; GFX9-NEXT: v_mov_b32_e32 v11, 2 6692; GFX9-NEXT: v_mov_b32_e32 v12, 2 6693; GFX9-NEXT: v_mov_b32_e32 v13, 2 6694; GFX9-NEXT: v_mov_b32_e32 v14, 2 6695; GFX9-NEXT: v_mov_b32_e32 v15, 3 6696; GFX9-NEXT: v_mov_b32_e32 v16, 3 6697; GFX9-NEXT: v_mov_b32_e32 v17, 3 6698; GFX9-NEXT: v_mov_b32_e32 v18, 3 6699; GFX9-NEXT: v_mov_b32_e32 v19, 3 6700; GFX9-NEXT: v_mov_b32_e32 v20, 4 6701; GFX9-NEXT: v_mov_b32_e32 v21, 4 6702; GFX9-NEXT: v_mov_b32_e32 v22, 4 6703; GFX9-NEXT: v_mov_b32_e32 v23, 4 6704; GFX9-NEXT: v_mov_b32_e32 v24, 4 6705; GFX9-NEXT: v_mov_b32_e32 v25, 5 6706; GFX9-NEXT: v_mov_b32_e32 v26, 5 6707; GFX9-NEXT: v_mov_b32_e32 v27, 5 6708; GFX9-NEXT: v_mov_b32_e32 v28, 5 6709; GFX9-NEXT: v_mov_b32_e32 v29, 5 6710; GFX9-NEXT: v_mov_b32_e32 v30, 6 6711; GFX9-NEXT: v_writelane_b32 v40, s31, 1 6712; GFX9-NEXT: s_getpc_b64 s[4:5] 6713; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 6714; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 6715; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 6716; GFX9-NEXT: v_readlane_b32 s31, v40, 1 6717; GFX9-NEXT: v_readlane_b32 s30, v40, 0 6718; GFX9-NEXT: s_mov_b32 s32, s33 6719; GFX9-NEXT: v_readlane_b32 s4, v40, 2 6720; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 6721; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6722; GFX9-NEXT: s_mov_b64 exec, s[6:7] 6723; GFX9-NEXT: s_mov_b32 s33, s4 6724; GFX9-NEXT: s_waitcnt vmcnt(0) 6725; GFX9-NEXT: s_setpc_b64 s[30:31] 6726; 6727; GFX11-LABEL: stack_8xv5i32: 6728; GFX11: ; %bb.0: ; %entry 6729; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6730; GFX11-NEXT: s_mov_b32 s0, s33 6731; GFX11-NEXT: s_mov_b32 s33, s32 6732; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6733; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 6734; GFX11-NEXT: s_mov_b32 exec_lo, s1 6735; GFX11-NEXT: v_writelane_b32 v40, s0, 2 6736; GFX11-NEXT: v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8 6737; GFX11-NEXT: v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10 6738; GFX11-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12 6739; GFX11-NEXT: s_add_i32 s32, s32, 16 6740; GFX11-NEXT: v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14 6741; GFX11-NEXT: v_mov_b32_e32 v6, 13 6742; GFX11-NEXT: s_add_i32 s0, s32, 32 6743; GFX11-NEXT: s_add_i32 s1, s32, 16 6744; GFX11-NEXT: v_writelane_b32 v40, s30, 0 6745; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 6746; GFX11-NEXT: v_mov_b32_e32 v1, 0 6747; GFX11-NEXT: scratch_store_b32 off, v8, s0 6748; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 6749; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0 6750; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 1 6751; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 1 6752; GFX11-NEXT: v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v9, 1 6753; GFX11-NEXT: v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v11, 2 6754; GFX11-NEXT: v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v13, 2 6755; GFX11-NEXT: v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v15, 3 6756; GFX11-NEXT: v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v17, 3 6757; GFX11-NEXT: v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v19, 3 6758; GFX11-NEXT: v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v21, 4 6759; GFX11-NEXT: v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v23, 4 6760; GFX11-NEXT: v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v25, 5 6761; GFX11-NEXT: v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v27, 5 6762; GFX11-NEXT: v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v29, 5 6763; GFX11-NEXT: v_mov_b32_e32 v28, 5 6764; GFX11-NEXT: v_mov_b32_e32 v30, 6 6765; GFX11-NEXT: v_writelane_b32 v40, s31, 1 6766; GFX11-NEXT: s_getpc_b64 s[0:1] 6767; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 6768; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 6769; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6770; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 6771; GFX11-NEXT: v_readlane_b32 s31, v40, 1 6772; GFX11-NEXT: v_readlane_b32 s30, v40, 0 6773; GFX11-NEXT: s_mov_b32 s32, s33 6774; GFX11-NEXT: v_readlane_b32 s0, v40, 2 6775; GFX11-NEXT: s_or_saveexec_b32 s1, -1 6776; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 6777; GFX11-NEXT: s_mov_b32 exec_lo, s1 6778; GFX11-NEXT: s_mov_b32 s33, s0 6779; GFX11-NEXT: s_waitcnt vmcnt(0) 6780; GFX11-NEXT: s_setpc_b64 s[30:31] 6781; 6782; HSA-LABEL: stack_8xv5i32: 6783; HSA: ; %bb.0: ; %entry 6784; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6785; HSA-NEXT: s_mov_b32 s4, s33 6786; HSA-NEXT: s_mov_b32 s33, s32 6787; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 6788; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6789; HSA-NEXT: s_mov_b64 exec, s[8:9] 6790; HSA-NEXT: s_addk_i32 s32, 0x400 6791; HSA-NEXT: v_mov_b32_e32 v0, 7 6792; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 6793; HSA-NEXT: v_mov_b32_e32 v0, 8 6794; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6795; HSA-NEXT: v_mov_b32_e32 v0, 9 6796; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6797; HSA-NEXT: v_mov_b32_e32 v0, 10 6798; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6799; HSA-NEXT: v_mov_b32_e32 v0, 11 6800; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6801; HSA-NEXT: v_mov_b32_e32 v0, 12 6802; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6803; HSA-NEXT: v_mov_b32_e32 v0, 13 6804; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6805; HSA-NEXT: v_mov_b32_e32 v0, 14 6806; HSA-NEXT: v_writelane_b32 v40, s4, 2 6807; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6808; HSA-NEXT: v_mov_b32_e32 v0, 15 6809; HSA-NEXT: v_writelane_b32 v40, s30, 0 6810; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6811; HSA-NEXT: v_mov_b32_e32 v0, 0 6812; HSA-NEXT: v_mov_b32_e32 v1, 0 6813; HSA-NEXT: v_mov_b32_e32 v2, 0 6814; HSA-NEXT: v_mov_b32_e32 v3, 0 6815; HSA-NEXT: v_mov_b32_e32 v4, 0 6816; HSA-NEXT: v_mov_b32_e32 v5, 1 6817; HSA-NEXT: v_mov_b32_e32 v6, 1 6818; HSA-NEXT: v_mov_b32_e32 v7, 1 6819; HSA-NEXT: v_mov_b32_e32 v8, 1 6820; HSA-NEXT: v_mov_b32_e32 v9, 1 6821; HSA-NEXT: v_mov_b32_e32 v10, 2 6822; HSA-NEXT: v_mov_b32_e32 v11, 2 6823; HSA-NEXT: v_mov_b32_e32 v12, 2 6824; HSA-NEXT: v_mov_b32_e32 v13, 2 6825; HSA-NEXT: v_mov_b32_e32 v14, 2 6826; HSA-NEXT: v_mov_b32_e32 v15, 3 6827; HSA-NEXT: v_mov_b32_e32 v16, 3 6828; HSA-NEXT: v_mov_b32_e32 v17, 3 6829; HSA-NEXT: v_mov_b32_e32 v18, 3 6830; HSA-NEXT: v_mov_b32_e32 v19, 3 6831; HSA-NEXT: v_mov_b32_e32 v20, 4 6832; HSA-NEXT: v_mov_b32_e32 v21, 4 6833; HSA-NEXT: v_mov_b32_e32 v22, 4 6834; HSA-NEXT: v_mov_b32_e32 v23, 4 6835; HSA-NEXT: v_mov_b32_e32 v24, 4 6836; HSA-NEXT: v_mov_b32_e32 v25, 5 6837; HSA-NEXT: v_mov_b32_e32 v26, 5 6838; HSA-NEXT: v_mov_b32_e32 v27, 5 6839; HSA-NEXT: v_mov_b32_e32 v28, 5 6840; HSA-NEXT: v_mov_b32_e32 v29, 5 6841; HSA-NEXT: v_mov_b32_e32 v30, 6 6842; HSA-NEXT: v_writelane_b32 v40, s31, 1 6843; HSA-NEXT: s_getpc_b64 s[4:5] 6844; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4 6845; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12 6846; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] 6847; HSA-NEXT: v_readlane_b32 s31, v40, 1 6848; HSA-NEXT: v_readlane_b32 s30, v40, 0 6849; HSA-NEXT: s_mov_b32 s32, s33 6850; HSA-NEXT: v_readlane_b32 s4, v40, 2 6851; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 6852; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6853; HSA-NEXT: s_mov_b64 exec, s[6:7] 6854; HSA-NEXT: s_mov_b32 s33, s4 6855; HSA-NEXT: s_waitcnt vmcnt(0) 6856; HSA-NEXT: s_setpc_b64 s[30:31] 6857entry: 6858 call void @external_void_func_8xv5i32( 6859 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>, 6860 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>, 6861 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>, 6862 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>, 6863 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>, 6864 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>, 6865 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>, 6866 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>) 6867 ret void 6868} 6869 6870define void @stack_8xv5f32() #0 { 6871; VI-LABEL: stack_8xv5f32: 6872; VI: ; %bb.0: ; %entry 6873; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6874; VI-NEXT: s_mov_b32 s4, s33 6875; VI-NEXT: s_mov_b32 s33, s32 6876; VI-NEXT: s_or_saveexec_b64 s[8:9], -1 6877; VI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6878; VI-NEXT: s_mov_b64 exec, s[8:9] 6879; VI-NEXT: s_addk_i32 s32, 0x400 6880; VI-NEXT: v_mov_b32_e32 v0, 0x40e00000 6881; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6882; VI-NEXT: v_mov_b32_e32 v0, 0x41000000 6883; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6884; VI-NEXT: v_mov_b32_e32 v0, 0x41100000 6885; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6886; VI-NEXT: v_mov_b32_e32 v0, 0x41200000 6887; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6888; VI-NEXT: v_mov_b32_e32 v0, 0x41300000 6889; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6890; VI-NEXT: v_mov_b32_e32 v0, 0x41400000 6891; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6892; VI-NEXT: v_mov_b32_e32 v0, 0x41500000 6893; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6894; VI-NEXT: v_mov_b32_e32 v0, 0x41600000 6895; VI-NEXT: v_writelane_b32 v40, s4, 2 6896; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6897; VI-NEXT: v_mov_b32_e32 v0, 0x41700000 6898; VI-NEXT: v_writelane_b32 v40, s30, 0 6899; VI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6900; VI-NEXT: v_mov_b32_e32 v0, 0 6901; VI-NEXT: v_mov_b32_e32 v1, 0 6902; VI-NEXT: v_mov_b32_e32 v2, 0 6903; VI-NEXT: v_mov_b32_e32 v3, 0 6904; VI-NEXT: v_mov_b32_e32 v4, 0 6905; VI-NEXT: v_mov_b32_e32 v5, 1.0 6906; VI-NEXT: v_mov_b32_e32 v6, 1.0 6907; VI-NEXT: v_mov_b32_e32 v7, 1.0 6908; VI-NEXT: v_mov_b32_e32 v8, 1.0 6909; VI-NEXT: v_mov_b32_e32 v9, 1.0 6910; VI-NEXT: v_mov_b32_e32 v10, 2.0 6911; VI-NEXT: v_mov_b32_e32 v11, 2.0 6912; VI-NEXT: v_mov_b32_e32 v12, 2.0 6913; VI-NEXT: v_mov_b32_e32 v13, 2.0 6914; VI-NEXT: v_mov_b32_e32 v14, 2.0 6915; VI-NEXT: v_mov_b32_e32 v15, 0x40400000 6916; VI-NEXT: v_mov_b32_e32 v16, 0x40400000 6917; VI-NEXT: v_mov_b32_e32 v17, 0x40400000 6918; VI-NEXT: v_mov_b32_e32 v18, 0x40400000 6919; VI-NEXT: v_mov_b32_e32 v19, 0x40400000 6920; VI-NEXT: v_mov_b32_e32 v20, 4.0 6921; VI-NEXT: v_mov_b32_e32 v21, 4.0 6922; VI-NEXT: v_mov_b32_e32 v22, 4.0 6923; VI-NEXT: v_mov_b32_e32 v23, 4.0 6924; VI-NEXT: v_mov_b32_e32 v24, 4.0 6925; VI-NEXT: v_mov_b32_e32 v25, 0x40a00000 6926; VI-NEXT: v_mov_b32_e32 v26, 0x40a00000 6927; VI-NEXT: v_mov_b32_e32 v27, 0x40a00000 6928; VI-NEXT: v_mov_b32_e32 v28, 0x40a00000 6929; VI-NEXT: v_mov_b32_e32 v29, 0x40a00000 6930; VI-NEXT: v_mov_b32_e32 v30, 0x40c00000 6931; VI-NEXT: v_writelane_b32 v40, s31, 1 6932; VI-NEXT: s_getpc_b64 s[4:5] 6933; VI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 6934; VI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 6935; VI-NEXT: s_swappc_b64 s[30:31], s[4:5] 6936; VI-NEXT: v_readlane_b32 s31, v40, 1 6937; VI-NEXT: v_readlane_b32 s30, v40, 0 6938; VI-NEXT: s_mov_b32 s32, s33 6939; VI-NEXT: v_readlane_b32 s4, v40, 2 6940; VI-NEXT: s_or_saveexec_b64 s[6:7], -1 6941; VI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 6942; VI-NEXT: s_mov_b64 exec, s[6:7] 6943; VI-NEXT: s_mov_b32 s33, s4 6944; VI-NEXT: s_waitcnt vmcnt(0) 6945; VI-NEXT: s_setpc_b64 s[30:31] 6946; 6947; CI-LABEL: stack_8xv5f32: 6948; CI: ; %bb.0: ; %entry 6949; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6950; CI-NEXT: s_mov_b32 s4, s33 6951; CI-NEXT: s_mov_b32 s33, s32 6952; CI-NEXT: s_or_saveexec_b64 s[8:9], -1 6953; CI-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 6954; CI-NEXT: s_mov_b64 exec, s[8:9] 6955; CI-NEXT: s_addk_i32 s32, 0x400 6956; CI-NEXT: v_mov_b32_e32 v0, 0x40e00000 6957; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 6958; CI-NEXT: v_mov_b32_e32 v0, 0x41000000 6959; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 6960; CI-NEXT: v_mov_b32_e32 v0, 0x41100000 6961; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 6962; CI-NEXT: v_mov_b32_e32 v0, 0x41200000 6963; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 6964; CI-NEXT: v_mov_b32_e32 v0, 0x41300000 6965; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 6966; CI-NEXT: v_mov_b32_e32 v0, 0x41400000 6967; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 6968; CI-NEXT: v_mov_b32_e32 v0, 0x41500000 6969; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 6970; CI-NEXT: v_mov_b32_e32 v0, 0x41600000 6971; CI-NEXT: v_writelane_b32 v40, s4, 2 6972; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 6973; CI-NEXT: v_mov_b32_e32 v0, 0x41700000 6974; CI-NEXT: v_writelane_b32 v40, s30, 0 6975; CI-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 6976; CI-NEXT: v_mov_b32_e32 v0, 0 6977; CI-NEXT: v_mov_b32_e32 v1, 0 6978; CI-NEXT: v_mov_b32_e32 v2, 0 6979; CI-NEXT: v_mov_b32_e32 v3, 0 6980; CI-NEXT: v_mov_b32_e32 v4, 0 6981; CI-NEXT: v_mov_b32_e32 v5, 1.0 6982; CI-NEXT: v_mov_b32_e32 v6, 1.0 6983; CI-NEXT: v_mov_b32_e32 v7, 1.0 6984; CI-NEXT: v_mov_b32_e32 v8, 1.0 6985; CI-NEXT: v_mov_b32_e32 v9, 1.0 6986; CI-NEXT: v_mov_b32_e32 v10, 2.0 6987; CI-NEXT: v_mov_b32_e32 v11, 2.0 6988; CI-NEXT: v_mov_b32_e32 v12, 2.0 6989; CI-NEXT: v_mov_b32_e32 v13, 2.0 6990; CI-NEXT: v_mov_b32_e32 v14, 2.0 6991; CI-NEXT: v_mov_b32_e32 v15, 0x40400000 6992; CI-NEXT: v_mov_b32_e32 v16, 0x40400000 6993; CI-NEXT: v_mov_b32_e32 v17, 0x40400000 6994; CI-NEXT: v_mov_b32_e32 v18, 0x40400000 6995; CI-NEXT: v_mov_b32_e32 v19, 0x40400000 6996; CI-NEXT: v_mov_b32_e32 v20, 4.0 6997; CI-NEXT: v_mov_b32_e32 v21, 4.0 6998; CI-NEXT: v_mov_b32_e32 v22, 4.0 6999; CI-NEXT: v_mov_b32_e32 v23, 4.0 7000; CI-NEXT: v_mov_b32_e32 v24, 4.0 7001; CI-NEXT: v_mov_b32_e32 v25, 0x40a00000 7002; CI-NEXT: v_mov_b32_e32 v26, 0x40a00000 7003; CI-NEXT: v_mov_b32_e32 v27, 0x40a00000 7004; CI-NEXT: v_mov_b32_e32 v28, 0x40a00000 7005; CI-NEXT: v_mov_b32_e32 v29, 0x40a00000 7006; CI-NEXT: v_mov_b32_e32 v30, 0x40c00000 7007; CI-NEXT: v_writelane_b32 v40, s31, 1 7008; CI-NEXT: s_getpc_b64 s[4:5] 7009; CI-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 7010; CI-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 7011; CI-NEXT: s_swappc_b64 s[30:31], s[4:5] 7012; CI-NEXT: v_readlane_b32 s31, v40, 1 7013; CI-NEXT: v_readlane_b32 s30, v40, 0 7014; CI-NEXT: s_mov_b32 s32, s33 7015; CI-NEXT: v_readlane_b32 s4, v40, 2 7016; CI-NEXT: s_or_saveexec_b64 s[6:7], -1 7017; CI-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 7018; CI-NEXT: s_mov_b64 exec, s[6:7] 7019; CI-NEXT: s_mov_b32 s33, s4 7020; CI-NEXT: s_waitcnt vmcnt(0) 7021; CI-NEXT: s_setpc_b64 s[30:31] 7022; 7023; GFX9-LABEL: stack_8xv5f32: 7024; GFX9: ; %bb.0: ; %entry 7025; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7026; GFX9-NEXT: s_mov_b32 s4, s33 7027; GFX9-NEXT: s_mov_b32 s33, s32 7028; GFX9-NEXT: s_or_saveexec_b64 s[8:9], -1 7029; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 7030; GFX9-NEXT: s_mov_b64 exec, s[8:9] 7031; GFX9-NEXT: s_addk_i32 s32, 0x400 7032; GFX9-NEXT: v_mov_b32_e32 v0, 0x40e00000 7033; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 7034; GFX9-NEXT: v_mov_b32_e32 v0, 0x41000000 7035; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 7036; GFX9-NEXT: v_mov_b32_e32 v0, 0x41100000 7037; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 7038; GFX9-NEXT: v_mov_b32_e32 v0, 0x41200000 7039; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 7040; GFX9-NEXT: v_mov_b32_e32 v0, 0x41300000 7041; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 7042; GFX9-NEXT: v_mov_b32_e32 v0, 0x41400000 7043; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 7044; GFX9-NEXT: v_mov_b32_e32 v0, 0x41500000 7045; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 7046; GFX9-NEXT: v_mov_b32_e32 v0, 0x41600000 7047; GFX9-NEXT: v_writelane_b32 v40, s4, 2 7048; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 7049; GFX9-NEXT: v_mov_b32_e32 v0, 0x41700000 7050; GFX9-NEXT: v_writelane_b32 v40, s30, 0 7051; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 7052; GFX9-NEXT: v_mov_b32_e32 v0, 0 7053; GFX9-NEXT: v_mov_b32_e32 v1, 0 7054; GFX9-NEXT: v_mov_b32_e32 v2, 0 7055; GFX9-NEXT: v_mov_b32_e32 v3, 0 7056; GFX9-NEXT: v_mov_b32_e32 v4, 0 7057; GFX9-NEXT: v_mov_b32_e32 v5, 1.0 7058; GFX9-NEXT: v_mov_b32_e32 v6, 1.0 7059; GFX9-NEXT: v_mov_b32_e32 v7, 1.0 7060; GFX9-NEXT: v_mov_b32_e32 v8, 1.0 7061; GFX9-NEXT: v_mov_b32_e32 v9, 1.0 7062; GFX9-NEXT: v_mov_b32_e32 v10, 2.0 7063; GFX9-NEXT: v_mov_b32_e32 v11, 2.0 7064; GFX9-NEXT: v_mov_b32_e32 v12, 2.0 7065; GFX9-NEXT: v_mov_b32_e32 v13, 2.0 7066; GFX9-NEXT: v_mov_b32_e32 v14, 2.0 7067; GFX9-NEXT: v_mov_b32_e32 v15, 0x40400000 7068; GFX9-NEXT: v_mov_b32_e32 v16, 0x40400000 7069; GFX9-NEXT: v_mov_b32_e32 v17, 0x40400000 7070; GFX9-NEXT: v_mov_b32_e32 v18, 0x40400000 7071; GFX9-NEXT: v_mov_b32_e32 v19, 0x40400000 7072; GFX9-NEXT: v_mov_b32_e32 v20, 4.0 7073; GFX9-NEXT: v_mov_b32_e32 v21, 4.0 7074; GFX9-NEXT: v_mov_b32_e32 v22, 4.0 7075; GFX9-NEXT: v_mov_b32_e32 v23, 4.0 7076; GFX9-NEXT: v_mov_b32_e32 v24, 4.0 7077; GFX9-NEXT: v_mov_b32_e32 v25, 0x40a00000 7078; GFX9-NEXT: v_mov_b32_e32 v26, 0x40a00000 7079; GFX9-NEXT: v_mov_b32_e32 v27, 0x40a00000 7080; GFX9-NEXT: v_mov_b32_e32 v28, 0x40a00000 7081; GFX9-NEXT: v_mov_b32_e32 v29, 0x40a00000 7082; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 7083; GFX9-NEXT: v_writelane_b32 v40, s31, 1 7084; GFX9-NEXT: s_getpc_b64 s[4:5] 7085; GFX9-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 7086; GFX9-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 7087; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 7088; GFX9-NEXT: v_readlane_b32 s31, v40, 1 7089; GFX9-NEXT: v_readlane_b32 s30, v40, 0 7090; GFX9-NEXT: s_mov_b32 s32, s33 7091; GFX9-NEXT: v_readlane_b32 s4, v40, 2 7092; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 7093; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 7094; GFX9-NEXT: s_mov_b64 exec, s[6:7] 7095; GFX9-NEXT: s_mov_b32 s33, s4 7096; GFX9-NEXT: s_waitcnt vmcnt(0) 7097; GFX9-NEXT: s_setpc_b64 s[30:31] 7098; 7099; GFX11-LABEL: stack_8xv5f32: 7100; GFX11: ; %bb.0: ; %entry 7101; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7102; GFX11-NEXT: s_mov_b32 s0, s33 7103; GFX11-NEXT: s_mov_b32 s33, s32 7104; GFX11-NEXT: s_or_saveexec_b32 s1, -1 7105; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill 7106; GFX11-NEXT: s_mov_b32 exec_lo, s1 7107; GFX11-NEXT: v_writelane_b32 v40, s0, 2 7108; GFX11-NEXT: v_mov_b32_e32 v0, 0x40e00000 7109; GFX11-NEXT: v_mov_b32_e32 v1, 0x41000000 7110; GFX11-NEXT: v_mov_b32_e32 v2, 0x41100000 7111; GFX11-NEXT: v_mov_b32_e32 v3, 0x41200000 7112; GFX11-NEXT: v_mov_b32_e32 v8, 0x41700000 7113; GFX11-NEXT: s_add_i32 s32, s32, 16 7114; GFX11-NEXT: v_mov_b32_e32 v4, 0x41300000 7115; GFX11-NEXT: v_mov_b32_e32 v5, 0x41400000 7116; GFX11-NEXT: v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0 7117; GFX11-NEXT: v_mov_b32_e32 v7, 0x41600000 7118; GFX11-NEXT: s_add_i32 s0, s32, 32 7119; GFX11-NEXT: s_add_i32 s1, s32, 16 7120; GFX11-NEXT: v_writelane_b32 v40, s30, 0 7121; GFX11-NEXT: scratch_store_b128 off, v[0:3], s32 7122; GFX11-NEXT: scratch_store_b32 off, v8, s0 7123; GFX11-NEXT: scratch_store_b128 off, v[4:7], s1 7124; GFX11-NEXT: v_mov_b32_e32 v6, 1.0 7125; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 7126; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 7127; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0 7128; GFX11-NEXT: v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0 7129; GFX11-NEXT: v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v10, 2.0 7130; GFX11-NEXT: v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v12, 2.0 7131; GFX11-NEXT: v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v14, 2.0 7132; GFX11-NEXT: v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000 7133; GFX11-NEXT: v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000 7134; GFX11-NEXT: v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v20, 4.0 7135; GFX11-NEXT: v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v22, 4.0 7136; GFX11-NEXT: v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v24, 4.0 7137; GFX11-NEXT: v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000 7138; GFX11-NEXT: v_dual_mov_b32 v29, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000 7139; GFX11-NEXT: v_mov_b32_e32 v30, 0x40c00000 7140; GFX11-NEXT: v_writelane_b32 v40, s31, 1 7141; GFX11-NEXT: s_getpc_b64 s[0:1] 7142; GFX11-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 7143; GFX11-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 7144; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7145; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 7146; GFX11-NEXT: v_readlane_b32 s31, v40, 1 7147; GFX11-NEXT: v_readlane_b32 s30, v40, 0 7148; GFX11-NEXT: s_mov_b32 s32, s33 7149; GFX11-NEXT: v_readlane_b32 s0, v40, 2 7150; GFX11-NEXT: s_or_saveexec_b32 s1, -1 7151; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload 7152; GFX11-NEXT: s_mov_b32 exec_lo, s1 7153; GFX11-NEXT: s_mov_b32 s33, s0 7154; GFX11-NEXT: s_waitcnt vmcnt(0) 7155; GFX11-NEXT: s_setpc_b64 s[30:31] 7156; 7157; HSA-LABEL: stack_8xv5f32: 7158; HSA: ; %bb.0: ; %entry 7159; HSA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7160; HSA-NEXT: s_mov_b32 s4, s33 7161; HSA-NEXT: s_mov_b32 s33, s32 7162; HSA-NEXT: s_or_saveexec_b64 s[8:9], -1 7163; HSA-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 7164; HSA-NEXT: s_mov_b64 exec, s[8:9] 7165; HSA-NEXT: s_addk_i32 s32, 0x400 7166; HSA-NEXT: v_mov_b32_e32 v0, 0x40e00000 7167; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 7168; HSA-NEXT: v_mov_b32_e32 v0, 0x41000000 7169; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 7170; HSA-NEXT: v_mov_b32_e32 v0, 0x41100000 7171; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 7172; HSA-NEXT: v_mov_b32_e32 v0, 0x41200000 7173; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 7174; HSA-NEXT: v_mov_b32_e32 v0, 0x41300000 7175; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 7176; HSA-NEXT: v_mov_b32_e32 v0, 0x41400000 7177; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 7178; HSA-NEXT: v_mov_b32_e32 v0, 0x41500000 7179; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 7180; HSA-NEXT: v_mov_b32_e32 v0, 0x41600000 7181; HSA-NEXT: v_writelane_b32 v40, s4, 2 7182; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:28 7183; HSA-NEXT: v_mov_b32_e32 v0, 0x41700000 7184; HSA-NEXT: v_writelane_b32 v40, s30, 0 7185; HSA-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:32 7186; HSA-NEXT: v_mov_b32_e32 v0, 0 7187; HSA-NEXT: v_mov_b32_e32 v1, 0 7188; HSA-NEXT: v_mov_b32_e32 v2, 0 7189; HSA-NEXT: v_mov_b32_e32 v3, 0 7190; HSA-NEXT: v_mov_b32_e32 v4, 0 7191; HSA-NEXT: v_mov_b32_e32 v5, 1.0 7192; HSA-NEXT: v_mov_b32_e32 v6, 1.0 7193; HSA-NEXT: v_mov_b32_e32 v7, 1.0 7194; HSA-NEXT: v_mov_b32_e32 v8, 1.0 7195; HSA-NEXT: v_mov_b32_e32 v9, 1.0 7196; HSA-NEXT: v_mov_b32_e32 v10, 2.0 7197; HSA-NEXT: v_mov_b32_e32 v11, 2.0 7198; HSA-NEXT: v_mov_b32_e32 v12, 2.0 7199; HSA-NEXT: v_mov_b32_e32 v13, 2.0 7200; HSA-NEXT: v_mov_b32_e32 v14, 2.0 7201; HSA-NEXT: v_mov_b32_e32 v15, 0x40400000 7202; HSA-NEXT: v_mov_b32_e32 v16, 0x40400000 7203; HSA-NEXT: v_mov_b32_e32 v17, 0x40400000 7204; HSA-NEXT: v_mov_b32_e32 v18, 0x40400000 7205; HSA-NEXT: v_mov_b32_e32 v19, 0x40400000 7206; HSA-NEXT: v_mov_b32_e32 v20, 4.0 7207; HSA-NEXT: v_mov_b32_e32 v21, 4.0 7208; HSA-NEXT: v_mov_b32_e32 v22, 4.0 7209; HSA-NEXT: v_mov_b32_e32 v23, 4.0 7210; HSA-NEXT: v_mov_b32_e32 v24, 4.0 7211; HSA-NEXT: v_mov_b32_e32 v25, 0x40a00000 7212; HSA-NEXT: v_mov_b32_e32 v26, 0x40a00000 7213; HSA-NEXT: v_mov_b32_e32 v27, 0x40a00000 7214; HSA-NEXT: v_mov_b32_e32 v28, 0x40a00000 7215; HSA-NEXT: v_mov_b32_e32 v29, 0x40a00000 7216; HSA-NEXT: v_mov_b32_e32 v30, 0x40c00000 7217; HSA-NEXT: v_writelane_b32 v40, s31, 1 7218; HSA-NEXT: s_getpc_b64 s[4:5] 7219; HSA-NEXT: s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4 7220; HSA-NEXT: s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12 7221; HSA-NEXT: s_swappc_b64 s[30:31], s[4:5] 7222; HSA-NEXT: v_readlane_b32 s31, v40, 1 7223; HSA-NEXT: v_readlane_b32 s30, v40, 0 7224; HSA-NEXT: s_mov_b32 s32, s33 7225; HSA-NEXT: v_readlane_b32 s4, v40, 2 7226; HSA-NEXT: s_or_saveexec_b64 s[6:7], -1 7227; HSA-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 7228; HSA-NEXT: s_mov_b64 exec, s[6:7] 7229; HSA-NEXT: s_mov_b32 s33, s4 7230; HSA-NEXT: s_waitcnt vmcnt(0) 7231; HSA-NEXT: s_setpc_b64 s[30:31] 7232entry: 7233 call void @external_void_func_8xv5f32( 7234 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, 7235 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, 7236 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>, 7237 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>, 7238 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>, 7239 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>, 7240 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>, 7241 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>) 7242 ret void 7243} 7244 7245declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0 7246declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0 7247declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, 7248 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 7249declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, 7250 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0 7251declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>, 7252 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0 7253declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>, 7254 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0 7255 7256attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 7257attributes #1 = { nounwind readnone } 7258attributes #2 = { nounwind noinline } 7259 7260!llvm.module.flags = !{!0} 7261!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 7262