1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s 5 6declare void @extern_func() #2 7 8define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 9; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs not be 10; preserved across the call and should get 8 scratch registers. 11; GFX9-LABEL: non_preserved_vgpr_tuple8: 12; GFX9: ; %bb.0: ; %main_body 13; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX9-NEXT: s_mov_b32 s4, s33 15; GFX9-NEXT: s_mov_b32 s33, s32 16; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 17; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 18; GFX9-NEXT: s_mov_b64 exec, s[6:7] 19; GFX9-NEXT: v_mov_b32_e32 v36, v16 20; GFX9-NEXT: v_mov_b32_e32 v35, v15 21; GFX9-NEXT: v_mov_b32_e32 v34, v14 22; GFX9-NEXT: v_mov_b32_e32 v33, v13 23; GFX9-NEXT: v_mov_b32_e32 v32, v12 24; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 25; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 26; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 27; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill 28; GFX9-NEXT: ;;#ASMSTART 29; GFX9-NEXT: ;;#ASMEND 30; GFX9-NEXT: ;;#ASMSTART 31; GFX9-NEXT: ;;#ASMEND 32; GFX9-NEXT: ;;#ASMSTART 33; GFX9-NEXT: ;;#ASMEND 34; GFX9-NEXT: ;;#ASMSTART 35; GFX9-NEXT: ;;#ASMEND 36; GFX9-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 37; GFX9-NEXT: s_addk_i32 s32, 0x800 38; GFX9-NEXT: v_writelane_b32 v44, s4, 2 39; GFX9-NEXT: s_getpc_b64 s[4:5] 40; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 41; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 42; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 43; GFX9-NEXT: v_writelane_b32 v44, s30, 0 44; GFX9-NEXT: v_writelane_b32 v44, s31, 1 45; GFX9-NEXT: s_waitcnt lgkmcnt(0) 46; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 47; GFX9-NEXT: v_mov_b32_e32 v0, v40 48; GFX9-NEXT: v_mov_b32_e32 v1, v41 49; GFX9-NEXT: v_mov_b32_e32 v2, v42 50; GFX9-NEXT: v_mov_b32_e32 v3, v43 51; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload 52; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 53; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 54; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 55; GFX9-NEXT: v_readlane_b32 s31, v44, 1 56; GFX9-NEXT: v_readlane_b32 s30, v44, 0 57; GFX9-NEXT: s_mov_b32 s32, s33 58; GFX9-NEXT: v_readlane_b32 s4, v44, 2 59; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 60; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload 61; GFX9-NEXT: s_mov_b64 exec, s[6:7] 62; GFX9-NEXT: s_mov_b32 s33, s4 63; GFX9-NEXT: s_waitcnt vmcnt(0) 64; GFX9-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX10-LABEL: non_preserved_vgpr_tuple8: 67; GFX10: ; %bb.0: ; %main_body 68; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX10-NEXT: s_mov_b32 s4, s33 70; GFX10-NEXT: s_mov_b32 s33, s32 71; GFX10-NEXT: s_or_saveexec_b32 s5, -1 72; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 73; GFX10-NEXT: s_waitcnt_depctr 0xffe3 74; GFX10-NEXT: s_mov_b32 exec_lo, s5 75; GFX10-NEXT: v_mov_b32_e32 v36, v16 76; GFX10-NEXT: v_mov_b32_e32 v35, v15 77; GFX10-NEXT: v_mov_b32_e32 v34, v14 78; GFX10-NEXT: v_mov_b32_e32 v33, v13 79; GFX10-NEXT: v_mov_b32_e32 v32, v12 80; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 81; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 82; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 83; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill 84; GFX10-NEXT: ;;#ASMSTART 85; GFX10-NEXT: ;;#ASMEND 86; GFX10-NEXT: ;;#ASMSTART 87; GFX10-NEXT: ;;#ASMEND 88; GFX10-NEXT: ;;#ASMSTART 89; GFX10-NEXT: ;;#ASMEND 90; GFX10-NEXT: ;;#ASMSTART 91; GFX10-NEXT: ;;#ASMEND 92; GFX10-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D 93; GFX10-NEXT: s_addk_i32 s32, 0x400 94; GFX10-NEXT: v_writelane_b32 v44, s4, 2 95; GFX10-NEXT: s_getpc_b64 s[4:5] 96; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 97; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 98; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 99; GFX10-NEXT: v_writelane_b32 v44, s30, 0 100; GFX10-NEXT: v_writelane_b32 v44, s31, 1 101; GFX10-NEXT: s_waitcnt lgkmcnt(0) 102; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 103; GFX10-NEXT: v_mov_b32_e32 v0, v40 104; GFX10-NEXT: v_mov_b32_e32 v1, v41 105; GFX10-NEXT: v_mov_b32_e32 v2, v42 106; GFX10-NEXT: v_mov_b32_e32 v3, v43 107; GFX10-NEXT: s_clause 0x3 108; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 109; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 110; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 111; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 112; GFX10-NEXT: v_readlane_b32 s31, v44, 1 113; GFX10-NEXT: v_readlane_b32 s30, v44, 0 114; GFX10-NEXT: s_mov_b32 s32, s33 115; GFX10-NEXT: v_readlane_b32 s4, v44, 2 116; GFX10-NEXT: s_or_saveexec_b32 s5, -1 117; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload 118; GFX10-NEXT: s_waitcnt_depctr 0xffe3 119; GFX10-NEXT: s_mov_b32 exec_lo, s5 120; GFX10-NEXT: s_mov_b32 s33, s4 121; GFX10-NEXT: s_waitcnt vmcnt(0) 122; GFX10-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX11-LABEL: non_preserved_vgpr_tuple8: 125; GFX11: ; %bb.0: ; %main_body 126; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX11-NEXT: s_mov_b32 s0, s33 128; GFX11-NEXT: s_mov_b32 s33, s32 129; GFX11-NEXT: s_or_saveexec_b32 s1, -1 130; GFX11-NEXT: scratch_store_b32 off, v44, s33 offset:16 ; 4-byte Folded Spill 131; GFX11-NEXT: s_mov_b32 exec_lo, s1 132; GFX11-NEXT: v_dual_mov_b32 v36, v16 :: v_dual_mov_b32 v35, v15 133; GFX11-NEXT: v_dual_mov_b32 v34, v14 :: v_dual_mov_b32 v33, v13 134; GFX11-NEXT: v_mov_b32_e32 v32, v12 135; GFX11-NEXT: s_clause 0x3 136; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:12 137; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:8 138; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:4 139; GFX11-NEXT: scratch_store_b32 off, v43, s33 140; GFX11-NEXT: ;;#ASMSTART 141; GFX11-NEXT: ;;#ASMEND 142; GFX11-NEXT: ;;#ASMSTART 143; GFX11-NEXT: ;;#ASMEND 144; GFX11-NEXT: ;;#ASMSTART 145; GFX11-NEXT: ;;#ASMEND 146; GFX11-NEXT: ;;#ASMSTART 147; GFX11-NEXT: ;;#ASMEND 148; GFX11-NEXT: image_gather4_c_b_cl v[40:43], v[32:36], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D 149; GFX11-NEXT: s_add_i32 s32, s32, 32 150; GFX11-NEXT: v_writelane_b32 v44, s0, 2 151; GFX11-NEXT: s_getpc_b64 s[0:1] 152; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 153; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 154; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 155; GFX11-NEXT: v_writelane_b32 v44, s30, 0 156; GFX11-NEXT: v_writelane_b32 v44, s31, 1 157; GFX11-NEXT: s_waitcnt lgkmcnt(0) 158; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 159; GFX11-NEXT: v_dual_mov_b32 v0, v40 :: v_dual_mov_b32 v1, v41 160; GFX11-NEXT: v_dual_mov_b32 v2, v42 :: v_dual_mov_b32 v3, v43 161; GFX11-NEXT: s_clause 0x3 162; GFX11-NEXT: scratch_load_b32 v43, off, s33 163; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:4 164; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:8 165; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:12 166; GFX11-NEXT: v_readlane_b32 s31, v44, 1 167; GFX11-NEXT: v_readlane_b32 s30, v44, 0 168; GFX11-NEXT: s_mov_b32 s32, s33 169; GFX11-NEXT: v_readlane_b32 s0, v44, 2 170; GFX11-NEXT: s_or_saveexec_b32 s1, -1 171; GFX11-NEXT: scratch_load_b32 v44, off, s33 offset:16 ; 4-byte Folded Reload 172; GFX11-NEXT: s_mov_b32 exec_lo, s1 173; GFX11-NEXT: s_mov_b32 s33, s0 174; GFX11-NEXT: s_waitcnt vmcnt(0) 175; GFX11-NEXT: s_setpc_b64 s[30:31] 176 177 178 179 180 181 182 183 184 185 186main_body: 187 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 188 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 189 call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0 190 call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0 191 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 192 call void @extern_func() 193 ret <4 x float> %v 194} 195 196define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 197; The vgpr tuple8 operand in image_gather4_c_b_cl instruction needs to be preserved 198; across the call and should get allcoated to 8 CSRs. 199; Only the lower 5 sub-registers of the tuple are preserved. 200; The upper 3 sub-registers are unused. 201; GFX9-LABEL: call_preserved_vgpr_tuple8: 202; GFX9: ; %bb.0: ; %main_body 203; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GFX9-NEXT: s_mov_b32 s4, s33 205; GFX9-NEXT: s_mov_b32 s33, s32 206; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 207; GFX9-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill 208; GFX9-NEXT: s_mov_b64 exec, s[6:7] 209; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 210; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 211; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 212; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 213; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill 214; GFX9-NEXT: v_mov_b32_e32 v44, v16 215; GFX9-NEXT: v_mov_b32_e32 v43, v15 216; GFX9-NEXT: v_mov_b32_e32 v42, v14 217; GFX9-NEXT: v_mov_b32_e32 v41, v13 218; GFX9-NEXT: v_mov_b32_e32 v40, v12 219; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:44], s[4:11], s[4:7] dmask:0x1 220; GFX9-NEXT: s_addk_i32 s32, 0x800 221; GFX9-NEXT: v_writelane_b32 v45, s4, 2 222; GFX9-NEXT: s_getpc_b64 s[4:5] 223; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 224; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 225; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 226; GFX9-NEXT: v_writelane_b32 v45, s30, 0 227; GFX9-NEXT: v_writelane_b32 v45, s31, 1 228; GFX9-NEXT: s_waitcnt vmcnt(0) 229; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 230; GFX9-NEXT: s_waitcnt lgkmcnt(0) 231; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] 232; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:44], s[4:11], s[4:7] dmask:0x1 233; GFX9-NEXT: s_nop 0 234; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload 235; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload 236; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload 237; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload 238; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload 239; GFX9-NEXT: v_readlane_b32 s31, v45, 1 240; GFX9-NEXT: v_readlane_b32 s30, v45, 0 241; GFX9-NEXT: s_mov_b32 s32, s33 242; GFX9-NEXT: v_readlane_b32 s4, v45, 2 243; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 244; GFX9-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload 245; GFX9-NEXT: s_mov_b64 exec, s[6:7] 246; GFX9-NEXT: s_mov_b32 s33, s4 247; GFX9-NEXT: s_waitcnt vmcnt(0) 248; GFX9-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX10-LABEL: call_preserved_vgpr_tuple8: 251; GFX10: ; %bb.0: ; %main_body 252; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX10-NEXT: s_mov_b32 s4, s33 254; GFX10-NEXT: s_mov_b32 s33, s32 255; GFX10-NEXT: s_or_saveexec_b32 s5, -1 256; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill 257; GFX10-NEXT: s_waitcnt_depctr 0xffe3 258; GFX10-NEXT: s_mov_b32 exec_lo, s5 259; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill 260; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill 261; GFX10-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill 262; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill 263; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill 264; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D 265; GFX10-NEXT: s_addk_i32 s32, 0x400 266; GFX10-NEXT: v_writelane_b32 v45, s4, 2 267; GFX10-NEXT: s_getpc_b64 s[4:5] 268; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 269; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 270; GFX10-NEXT: v_mov_b32_e32 v40, v16 271; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 272; GFX10-NEXT: v_mov_b32_e32 v41, v15 273; GFX10-NEXT: v_writelane_b32 v45, s30, 0 274; GFX10-NEXT: v_mov_b32_e32 v42, v14 275; GFX10-NEXT: v_mov_b32_e32 v43, v13 276; GFX10-NEXT: v_mov_b32_e32 v44, v12 277; GFX10-NEXT: v_writelane_b32 v45, s31, 1 278; GFX10-NEXT: s_waitcnt vmcnt(0) 279; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off 280; GFX10-NEXT: s_waitcnt lgkmcnt(0) 281; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 282; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D 283; GFX10-NEXT: s_clause 0x4 284; GFX10-NEXT: buffer_load_dword v44, off, s[0:3], s33 285; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 286; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 287; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 288; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:16 289; GFX10-NEXT: v_readlane_b32 s31, v45, 1 290; GFX10-NEXT: v_readlane_b32 s30, v45, 0 291; GFX10-NEXT: s_mov_b32 s32, s33 292; GFX10-NEXT: v_readlane_b32 s4, v45, 2 293; GFX10-NEXT: s_or_saveexec_b32 s5, -1 294; GFX10-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload 295; GFX10-NEXT: s_waitcnt_depctr 0xffe3 296; GFX10-NEXT: s_mov_b32 exec_lo, s5 297; GFX10-NEXT: s_mov_b32 s33, s4 298; GFX10-NEXT: s_waitcnt vmcnt(0) 299; GFX10-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX11-LABEL: call_preserved_vgpr_tuple8: 302; GFX11: ; %bb.0: ; %main_body 303; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX11-NEXT: s_mov_b32 s0, s33 305; GFX11-NEXT: s_mov_b32 s33, s32 306; GFX11-NEXT: s_or_saveexec_b32 s1, -1 307; GFX11-NEXT: scratch_store_b32 off, v45, s33 offset:20 ; 4-byte Folded Spill 308; GFX11-NEXT: s_mov_b32 exec_lo, s1 309; GFX11-NEXT: s_clause 0x4 310; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:16 311; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:12 312; GFX11-NEXT: scratch_store_b32 off, v42, s33 offset:8 313; GFX11-NEXT: scratch_store_b32 off, v43, s33 offset:4 314; GFX11-NEXT: scratch_store_b32 off, v44, s33 315; GFX11-NEXT: image_gather4_c_b_cl v[0:3], v[12:16], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D 316; GFX11-NEXT: s_add_i32 s32, s32, 32 317; GFX11-NEXT: v_writelane_b32 v45, s0, 2 318; GFX11-NEXT: s_getpc_b64 s[0:1] 319; GFX11-NEXT: s_add_u32 s0, s0, extern_func@gotpcrel32@lo+4 320; GFX11-NEXT: s_addc_u32 s1, s1, extern_func@gotpcrel32@hi+12 321; GFX11-NEXT: v_dual_mov_b32 v40, v16 :: v_dual_mov_b32 v41, v15 322; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 323; GFX11-NEXT: v_writelane_b32 v45, s30, 0 324; GFX11-NEXT: v_dual_mov_b32 v42, v14 :: v_dual_mov_b32 v43, v13 325; GFX11-NEXT: v_mov_b32_e32 v44, v12 326; GFX11-NEXT: v_writelane_b32 v45, s31, 1 327; GFX11-NEXT: s_waitcnt vmcnt(0) 328; GFX11-NEXT: global_store_b128 v[0:1], v[0:3], off 329; GFX11-NEXT: s_waitcnt lgkmcnt(0) 330; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 331; GFX11-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D 332; GFX11-NEXT: s_clause 0x4 333; GFX11-NEXT: scratch_load_b32 v44, off, s33 334; GFX11-NEXT: scratch_load_b32 v43, off, s33 offset:4 335; GFX11-NEXT: scratch_load_b32 v42, off, s33 offset:8 336; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:12 337; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:16 338; GFX11-NEXT: v_readlane_b32 s31, v45, 1 339; GFX11-NEXT: v_readlane_b32 s30, v45, 0 340; GFX11-NEXT: s_mov_b32 s32, s33 341; GFX11-NEXT: v_readlane_b32 s0, v45, 2 342; GFX11-NEXT: s_or_saveexec_b32 s1, -1 343; GFX11-NEXT: scratch_load_b32 v45, off, s33 offset:20 ; 4-byte Folded Reload 344; GFX11-NEXT: s_mov_b32 exec_lo, s1 345; GFX11-NEXT: s_mov_b32 s33, s0 346; GFX11-NEXT: s_waitcnt vmcnt(0) 347; GFX11-NEXT: s_setpc_b64 s[30:31] 348 349 350 351 352 353 354 355main_body: 356 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 357 store <4 x float> %v, ptr addrspace(1) undef 358 call void @extern_func() 359 %v1 = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) 360 ret <4 x float> %v1 361} 362 363declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1 364 365attributes #0 = { nounwind writeonly } 366attributes #1 = { nounwind readonly } 367attributes #2 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 368