1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s 3 4define amdgpu_ps void @group_image_sample(i32 inreg noundef %globalTable, i32 inreg noundef %userdata6, i32 inreg noundef %userdata7, i32 inreg noundef %userdata8, i32 inreg noundef %PrimMask, <2 x float> noundef %PerspInterpSample, <2 x float> noundef %PerspInterpCenter, <2 x float> noundef %PerspInterpCentroid) #2 { 5; GFX11-LABEL: group_image_sample: 6; GFX11: ; %bb.0: ; %.entry 7; GFX11-NEXT: s_mov_b32 s33, exec_lo 8; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 9; GFX11-NEXT: s_mov_b32 m0, s4 10; GFX11-NEXT: s_getpc_b64 s[4:5] 11; GFX11-NEXT: s_mov_b32 s0, s1 12; GFX11-NEXT: s_mov_b32 s6, s3 13; GFX11-NEXT: s_mov_b32 s1, s5 14; GFX11-NEXT: s_mov_b32 s3, s5 15; GFX11-NEXT: s_mov_b32 s7, s5 16; GFX11-NEXT: s_load_b128 s[12:15], s[0:1], 0x0 17; GFX11-NEXT: s_load_b128 s[8:11], s[2:3], 0x0 18; GFX11-NEXT: s_load_b256 s[0:7], s[6:7], 0x0 19; GFX11-NEXT: s_mov_b32 s16, exec_lo 20; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 21; GFX11-NEXT: lds_param_load v2, attr0.y wait_vdst:15 22; GFX11-NEXT: lds_param_load v3, attr0.x wait_vdst:15 23; GFX11-NEXT: s_mov_b32 exec_lo, s16 24; GFX11-NEXT: v_interp_p10_f32 v4, v2, v0, v2 wait_exp:1 25; GFX11-NEXT: v_interp_p10_f32 v0, v3, v0, v3 wait_exp:0 26; GFX11-NEXT: s_waitcnt lgkmcnt(0) 27; GFX11-NEXT: s_clause 0xf 28; GFX11-NEXT: s_buffer_load_b64 s[16:17], s[12:15], 0x10 29; GFX11-NEXT: s_buffer_load_b64 s[18:19], s[12:15], 0x20 30; GFX11-NEXT: s_buffer_load_b64 s[20:21], s[12:15], 0x30 31; GFX11-NEXT: s_buffer_load_b64 s[22:23], s[12:15], 0x40 32; GFX11-NEXT: s_buffer_load_b64 s[24:25], s[12:15], 0x50 33; GFX11-NEXT: s_buffer_load_b64 s[26:27], s[12:15], 0x60 34; GFX11-NEXT: s_buffer_load_b64 s[28:29], s[12:15], 0x70 35; GFX11-NEXT: s_buffer_load_b64 s[30:31], s[12:15], 0x80 36; GFX11-NEXT: s_buffer_load_b64 s[34:35], s[12:15], 0x90 37; GFX11-NEXT: s_buffer_load_b64 s[36:37], s[12:15], 0xa0 38; GFX11-NEXT: s_buffer_load_b64 s[38:39], s[12:15], 0xb0 39; GFX11-NEXT: s_buffer_load_b64 s[40:41], s[12:15], 0xc0 40; GFX11-NEXT: s_buffer_load_b64 s[42:43], s[12:15], 0xd0 41; GFX11-NEXT: s_buffer_load_b64 s[44:45], s[12:15], 0xe0 42; GFX11-NEXT: s_buffer_load_b64 s[46:47], s[12:15], 0xf0 43; GFX11-NEXT: s_buffer_load_b64 s[12:13], s[12:15], 0x100 44; GFX11-NEXT: v_interp_p2_f32 v36, v2, v1, v4 wait_exp:7 45; GFX11-NEXT: v_interp_p2_f32 v0, v3, v1, v0 wait_exp:7 46; GFX11-NEXT: s_waitcnt lgkmcnt(0) 47; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 48; GFX11-NEXT: v_add_f32_e32 v5, s17, v36 49; GFX11-NEXT: v_add_f32_e32 v4, s16, v0 50; GFX11-NEXT: v_add_f32_e32 v8, s18, v0 51; GFX11-NEXT: v_add_f32_e32 v9, s19, v36 52; GFX11-NEXT: v_add_f32_e32 v12, s20, v0 53; GFX11-NEXT: v_add_f32_e32 v13, s21, v36 54; GFX11-NEXT: v_add_f32_e32 v16, s22, v0 55; GFX11-NEXT: v_add_f32_e32 v17, s23, v36 56; GFX11-NEXT: v_add_f32_e32 v20, s24, v0 57; GFX11-NEXT: v_add_f32_e32 v21, s25, v36 58; GFX11-NEXT: v_add_f32_e32 v24, s26, v0 59; GFX11-NEXT: v_add_f32_e32 v25, s27, v36 60; GFX11-NEXT: v_add_f32_e32 v28, s28, v0 61; GFX11-NEXT: v_add_f32_e32 v29, s29, v36 62; GFX11-NEXT: v_add_f32_e32 v32, s30, v0 63; GFX11-NEXT: v_add_f32_e32 v33, s31, v36 64; GFX11-NEXT: s_clause 0x7 65; GFX11-NEXT: image_sample v[4:7], v[4:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 66; GFX11-NEXT: image_sample v[8:11], v[8:9], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 67; GFX11-NEXT: image_sample v[12:15], v[12:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 68; GFX11-NEXT: image_sample v[16:19], v[16:17], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 69; GFX11-NEXT: image_sample v[20:23], v[20:21], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 70; GFX11-NEXT: image_sample v[24:27], v[24:25], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 71; GFX11-NEXT: image_sample v[28:31], v[28:29], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 72; GFX11-NEXT: image_sample v[32:35], v[32:33], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 73; GFX11-NEXT: v_add_f32_e32 v37, s34, v0 74; GFX11-NEXT: v_add_f32_e32 v38, s35, v36 75; GFX11-NEXT: v_add_f32_e32 v40, s36, v0 76; GFX11-NEXT: v_add_f32_e32 v41, s37, v36 77; GFX11-NEXT: v_add_f32_e32 v44, s38, v0 78; GFX11-NEXT: v_add_f32_e32 v45, s39, v36 79; GFX11-NEXT: v_add_f32_e32 v48, s40, v0 80; GFX11-NEXT: v_add_f32_e32 v49, s41, v36 81; GFX11-NEXT: v_add_f32_e32 v52, s42, v0 82; GFX11-NEXT: v_add_f32_e32 v53, s43, v36 83; GFX11-NEXT: v_add_f32_e32 v56, s44, v0 84; GFX11-NEXT: v_add_f32_e32 v57, s45, v36 85; GFX11-NEXT: v_add_f32_e32 v60, s46, v0 86; GFX11-NEXT: v_add_f32_e32 v61, s47, v36 87; GFX11-NEXT: v_add_f32_e32 v0, s12, v0 88; GFX11-NEXT: v_add_f32_e32 v1, s13, v36 89; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s33 90; GFX11-NEXT: s_clause 0x7 91; GFX11-NEXT: image_sample v[36:39], v[37:38], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 92; GFX11-NEXT: image_sample v[40:43], v[40:41], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 93; GFX11-NEXT: image_sample v[44:47], v[44:45], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 94; GFX11-NEXT: image_sample v[48:51], v[48:49], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 95; GFX11-NEXT: image_sample v[52:55], v[52:53], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 96; GFX11-NEXT: image_sample v[56:59], v[56:57], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 97; GFX11-NEXT: image_sample v[60:63], v[60:61], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 98; GFX11-NEXT: image_sample v[64:67], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 99; GFX11-NEXT: s_waitcnt vmcnt(14) 100; GFX11-NEXT: v_dual_add_f32 v0, v8, v4 :: v_dual_add_f32 v1, v9, v5 101; GFX11-NEXT: v_dual_add_f32 v4, v10, v6 :: v_dual_add_f32 v5, v11, v7 102; GFX11-NEXT: s_waitcnt vmcnt(13) 103; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 104; GFX11-NEXT: v_dual_add_f32 v0, v12, v0 :: v_dual_add_f32 v1, v13, v1 105; GFX11-NEXT: v_dual_add_f32 v4, v14, v4 :: v_dual_add_f32 v5, v15, v5 106; GFX11-NEXT: s_waitcnt vmcnt(12) 107; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 108; GFX11-NEXT: v_dual_add_f32 v0, v16, v0 :: v_dual_add_f32 v1, v17, v1 109; GFX11-NEXT: v_dual_add_f32 v4, v18, v4 :: v_dual_add_f32 v5, v19, v5 110; GFX11-NEXT: s_waitcnt vmcnt(11) 111; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 112; GFX11-NEXT: v_dual_add_f32 v0, v20, v0 :: v_dual_add_f32 v1, v21, v1 113; GFX11-NEXT: v_dual_add_f32 v4, v22, v4 :: v_dual_add_f32 v5, v23, v5 114; GFX11-NEXT: s_waitcnt vmcnt(10) 115; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 116; GFX11-NEXT: v_dual_add_f32 v0, v24, v0 :: v_dual_add_f32 v1, v25, v1 117; GFX11-NEXT: v_dual_add_f32 v4, v26, v4 :: v_dual_add_f32 v5, v27, v5 118; GFX11-NEXT: s_waitcnt vmcnt(9) 119; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 120; GFX11-NEXT: v_dual_add_f32 v0, v28, v0 :: v_dual_add_f32 v1, v29, v1 121; GFX11-NEXT: v_dual_add_f32 v4, v30, v4 :: v_dual_add_f32 v5, v31, v5 122; GFX11-NEXT: s_waitcnt vmcnt(8) 123; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 124; GFX11-NEXT: v_dual_add_f32 v0, v32, v0 :: v_dual_add_f32 v1, v33, v1 125; GFX11-NEXT: v_dual_add_f32 v4, v34, v4 :: v_dual_add_f32 v5, v35, v5 126; GFX11-NEXT: s_waitcnt vmcnt(7) 127; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 128; GFX11-NEXT: v_dual_add_f32 v0, v36, v0 :: v_dual_add_f32 v1, v37, v1 129; GFX11-NEXT: v_dual_add_f32 v4, v38, v4 :: v_dual_add_f32 v5, v39, v5 130; GFX11-NEXT: s_waitcnt vmcnt(6) 131; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 132; GFX11-NEXT: v_dual_add_f32 v0, v40, v0 :: v_dual_add_f32 v1, v41, v1 133; GFX11-NEXT: v_dual_add_f32 v4, v42, v4 :: v_dual_add_f32 v5, v43, v5 134; GFX11-NEXT: s_waitcnt vmcnt(5) 135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 136; GFX11-NEXT: v_dual_add_f32 v0, v44, v0 :: v_dual_add_f32 v1, v45, v1 137; GFX11-NEXT: v_dual_add_f32 v4, v46, v4 :: v_dual_add_f32 v5, v47, v5 138; GFX11-NEXT: s_waitcnt vmcnt(4) 139; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 140; GFX11-NEXT: v_dual_add_f32 v0, v48, v0 :: v_dual_add_f32 v1, v49, v1 141; GFX11-NEXT: v_dual_add_f32 v4, v50, v4 :: v_dual_add_f32 v5, v51, v5 142; GFX11-NEXT: s_waitcnt vmcnt(3) 143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 144; GFX11-NEXT: v_dual_add_f32 v0, v52, v0 :: v_dual_add_f32 v1, v53, v1 145; GFX11-NEXT: v_dual_add_f32 v4, v54, v4 :: v_dual_add_f32 v5, v55, v5 146; GFX11-NEXT: s_waitcnt vmcnt(2) 147; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 148; GFX11-NEXT: v_dual_add_f32 v0, v56, v0 :: v_dual_add_f32 v1, v57, v1 149; GFX11-NEXT: v_dual_add_f32 v4, v58, v4 :: v_dual_add_f32 v5, v59, v5 150; GFX11-NEXT: s_waitcnt vmcnt(1) 151; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 152; GFX11-NEXT: v_dual_add_f32 v0, v60, v0 :: v_dual_add_f32 v1, v61, v1 153; GFX11-NEXT: v_dual_add_f32 v4, v62, v4 :: v_dual_add_f32 v5, v63, v5 154; GFX11-NEXT: s_waitcnt vmcnt(0) 155; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 156; GFX11-NEXT: v_dual_add_f32 v0, v64, v0 :: v_dual_add_f32 v1, v65, v1 157; GFX11-NEXT: v_dual_add_f32 v4, v66, v4 :: v_dual_add_f32 v5, v67, v5 158; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 159; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e32 v0, v0, v1 160; GFX11-NEXT: v_cvt_pk_rtz_f16_f32_e32 v1, v4, v5 161; GFX11-NEXT: exp mrt0 v0, v1, off, off done 162; GFX11-NEXT: s_endpgm 163.entry: 164 %i = call i64 @llvm.amdgcn.s.getpc() 165 %i1 = and i64 %i, -4294967296 166 %i2 = zext i32 %userdata6 to i64 167 %i3 = or disjoint i64 %i1, %i2 168 %i4 = inttoptr i64 %i3 to ptr addrspace(4) 169 %i5 = load <4 x i32>, ptr addrspace(4) %i4, align 16 170 %i6 = zext i32 %userdata7 to i64 171 %i7 = or disjoint i64 %i1, %i6 172 %i8 = inttoptr i64 %i7 to ptr addrspace(4) 173 %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 4, !invariant.load !0 174 %i10 = zext i32 %userdata8 to i64 175 %i11 = or disjoint i64 %i1, %i10 176 %i12 = inttoptr i64 %i11 to ptr addrspace(4) 177 %i13 = load <8 x i32>, ptr addrspace(4) %i12, align 4, !invariant.load !0 178 %i14 = call float @llvm.amdgcn.lds.param.load(i32 1, i32 0, i32 %PrimMask) 179 %PerspInterpCenter.i1 = extractelement <2 x float> %PerspInterpCenter, i64 1 180 %PerspInterpCenter.i0 = extractelement <2 x float> %PerspInterpCenter, i64 0 181 %i15 = call float @llvm.amdgcn.interp.inreg.p10(float %i14, float %PerspInterpCenter.i0, float %i14) 182 %i16 = call float @llvm.amdgcn.interp.inreg.p2(float %i14, float %PerspInterpCenter.i1, float %i15) 183 %i17 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %PrimMask) 184 %i18 = call float @llvm.amdgcn.interp.inreg.p10(float %i17, float %PerspInterpCenter.i0, float %i17) 185 %i19 = call float @llvm.amdgcn.interp.inreg.p2(float %i17, float %PerspInterpCenter.i1, float %i18) 186 %i20 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 16, i32 0), !invariant.load !0 187 %i21 = shufflevector <2 x i32> %i20, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 188 %i22 = bitcast <4 x i32> %i21 to <4 x float> 189 %.i0 = extractelement <4 x float> %i22, i64 0 190 %.i1 = extractelement <4 x float> %i22, i64 1 191 %.i03 = fadd reassoc nnan nsz arcp contract afn float %.i0, %i19 192 %.i14 = fadd reassoc nnan nsz arcp contract afn float %.i1, %i16 193 %i23 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i03, float %.i14, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 194 %.i010 = extractelement <4 x float> %i23, i64 0 195 %.i113 = extractelement <4 x float> %i23, i64 1 196 %.i215 = extractelement <4 x float> %i23, i64 2 197 %.i317 = extractelement <4 x float> %i23, i64 3 198 %i24 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 32, i32 0), !invariant.load !0 199 %i25 = shufflevector <2 x i32> %i24, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 200 %i26 = bitcast <4 x i32> %i25 to <4 x float> 201 %.i05 = extractelement <4 x float> %i26, i64 0 202 %.i16 = extractelement <4 x float> %i26, i64 1 203 %.i07 = fadd reassoc nnan nsz arcp contract afn float %.i05, %i19 204 %.i18 = fadd reassoc nnan nsz arcp contract afn float %.i16, %i16 205 %i27 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i07, float %.i18, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 206 %.i09 = extractelement <4 x float> %i27, i64 0 207 %.i011 = fadd reassoc nnan nsz arcp contract afn float %.i09, %.i010 208 %.i112 = extractelement <4 x float> %i27, i64 1 209 %.i114 = fadd reassoc nnan nsz arcp contract afn float %.i112, %.i113 210 %.i2 = extractelement <4 x float> %i27, i64 2 211 %.i216 = fadd reassoc nnan nsz arcp contract afn float %.i2, %.i215 212 %.i3 = extractelement <4 x float> %i27, i64 3 213 %.i318 = fadd reassoc nnan nsz arcp contract afn float %.i3, %.i317 214 %i28 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 48, i32 0), !invariant.load !0 215 %i29 = shufflevector <2 x i32> %i28, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 216 %i30 = bitcast <4 x i32> %i29 to <4 x float> 217 %.i019 = extractelement <4 x float> %i30, i64 0 218 %.i120 = extractelement <4 x float> %i30, i64 1 219 %.i021 = fadd reassoc nnan nsz arcp contract afn float %.i019, %i19 220 %.i122 = fadd reassoc nnan nsz arcp contract afn float %.i120, %i16 221 %i31 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i021, float %.i122, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 222 %.i023 = extractelement <4 x float> %i31, i64 0 223 %.i024 = fadd reassoc nnan nsz arcp contract afn float %.i023, %.i011 224 %.i125 = extractelement <4 x float> %i31, i64 1 225 %.i126 = fadd reassoc nnan nsz arcp contract afn float %.i125, %.i114 226 %.i227 = extractelement <4 x float> %i31, i64 2 227 %.i228 = fadd reassoc nnan nsz arcp contract afn float %.i227, %.i216 228 %.i329 = extractelement <4 x float> %i31, i64 3 229 %.i330 = fadd reassoc nnan nsz arcp contract afn float %.i329, %.i318 230 %i32 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 64, i32 0), !invariant.load !0 231 %i33 = shufflevector <2 x i32> %i32, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 232 %i34 = bitcast <4 x i32> %i33 to <4 x float> 233 %.i031 = extractelement <4 x float> %i34, i64 0 234 %.i132 = extractelement <4 x float> %i34, i64 1 235 %.i033 = fadd reassoc nnan nsz arcp contract afn float %.i031, %i19 236 %.i134 = fadd reassoc nnan nsz arcp contract afn float %.i132, %i16 237 %i35 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i033, float %.i134, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 238 %.i035 = extractelement <4 x float> %i35, i64 0 239 %.i036 = fadd reassoc nnan nsz arcp contract afn float %.i035, %.i024 240 %.i137 = extractelement <4 x float> %i35, i64 1 241 %.i138 = fadd reassoc nnan nsz arcp contract afn float %.i137, %.i126 242 %.i239 = extractelement <4 x float> %i35, i64 2 243 %.i240 = fadd reassoc nnan nsz arcp contract afn float %.i239, %.i228 244 %.i341 = extractelement <4 x float> %i35, i64 3 245 %.i342 = fadd reassoc nnan nsz arcp contract afn float %.i341, %.i330 246 %i36 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 80, i32 0), !invariant.load !0 247 %i37 = shufflevector <2 x i32> %i36, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 248 %i38 = bitcast <4 x i32> %i37 to <4 x float> 249 %.i043 = extractelement <4 x float> %i38, i64 0 250 %.i144 = extractelement <4 x float> %i38, i64 1 251 %.i045 = fadd reassoc nnan nsz arcp contract afn float %.i043, %i19 252 %.i146 = fadd reassoc nnan nsz arcp contract afn float %.i144, %i16 253 %i39 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i045, float %.i146, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 254 %.i047 = extractelement <4 x float> %i39, i64 0 255 %.i048 = fadd reassoc nnan nsz arcp contract afn float %.i047, %.i036 256 %.i149 = extractelement <4 x float> %i39, i64 1 257 %.i150 = fadd reassoc nnan nsz arcp contract afn float %.i149, %.i138 258 %.i251 = extractelement <4 x float> %i39, i64 2 259 %.i252 = fadd reassoc nnan nsz arcp contract afn float %.i251, %.i240 260 %.i353 = extractelement <4 x float> %i39, i64 3 261 %.i354 = fadd reassoc nnan nsz arcp contract afn float %.i353, %.i342 262 %i40 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 96, i32 0), !invariant.load !0 263 %i41 = shufflevector <2 x i32> %i40, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 264 %i42 = bitcast <4 x i32> %i41 to <4 x float> 265 %.i055 = extractelement <4 x float> %i42, i64 0 266 %.i156 = extractelement <4 x float> %i42, i64 1 267 %.i057 = fadd reassoc nnan nsz arcp contract afn float %.i055, %i19 268 %.i158 = fadd reassoc nnan nsz arcp contract afn float %.i156, %i16 269 %i43 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i057, float %.i158, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 270 %.i059 = extractelement <4 x float> %i43, i64 0 271 %.i060 = fadd reassoc nnan nsz arcp contract afn float %.i059, %.i048 272 %.i161 = extractelement <4 x float> %i43, i64 1 273 %.i162 = fadd reassoc nnan nsz arcp contract afn float %.i161, %.i150 274 %.i263 = extractelement <4 x float> %i43, i64 2 275 %.i264 = fadd reassoc nnan nsz arcp contract afn float %.i263, %.i252 276 %.i365 = extractelement <4 x float> %i43, i64 3 277 %.i366 = fadd reassoc nnan nsz arcp contract afn float %.i365, %.i354 278 %i44 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 112, i32 0), !invariant.load !0 279 %i45 = shufflevector <2 x i32> %i44, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 280 %i46 = bitcast <4 x i32> %i45 to <4 x float> 281 %.i067 = extractelement <4 x float> %i46, i64 0 282 %.i168 = extractelement <4 x float> %i46, i64 1 283 %.i069 = fadd reassoc nnan nsz arcp contract afn float %.i067, %i19 284 %.i170 = fadd reassoc nnan nsz arcp contract afn float %.i168, %i16 285 %i47 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i069, float %.i170, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 286 %.i071 = extractelement <4 x float> %i47, i64 0 287 %.i072 = fadd reassoc nnan nsz arcp contract afn float %.i071, %.i060 288 %.i173 = extractelement <4 x float> %i47, i64 1 289 %.i174 = fadd reassoc nnan nsz arcp contract afn float %.i173, %.i162 290 %.i275 = extractelement <4 x float> %i47, i64 2 291 %.i276 = fadd reassoc nnan nsz arcp contract afn float %.i275, %.i264 292 %.i377 = extractelement <4 x float> %i47, i64 3 293 %.i378 = fadd reassoc nnan nsz arcp contract afn float %.i377, %.i366 294 %i48 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 128, i32 0), !invariant.load !0 295 %i49 = shufflevector <2 x i32> %i48, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 296 %i50 = bitcast <4 x i32> %i49 to <4 x float> 297 %.i079 = extractelement <4 x float> %i50, i64 0 298 %.i180 = extractelement <4 x float> %i50, i64 1 299 %.i081 = fadd reassoc nnan nsz arcp contract afn float %.i079, %i19 300 %.i182 = fadd reassoc nnan nsz arcp contract afn float %.i180, %i16 301 %i51 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i081, float %.i182, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 302 %.i083 = extractelement <4 x float> %i51, i64 0 303 %.i084 = fadd reassoc nnan nsz arcp contract afn float %.i083, %.i072 304 %.i185 = extractelement <4 x float> %i51, i64 1 305 %.i186 = fadd reassoc nnan nsz arcp contract afn float %.i185, %.i174 306 %.i287 = extractelement <4 x float> %i51, i64 2 307 %.i288 = fadd reassoc nnan nsz arcp contract afn float %.i287, %.i276 308 %.i389 = extractelement <4 x float> %i51, i64 3 309 %.i390 = fadd reassoc nnan nsz arcp contract afn float %.i389, %.i378 310 %i52 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 144, i32 0), !invariant.load !0 311 %i53 = shufflevector <2 x i32> %i52, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 312 %i54 = bitcast <4 x i32> %i53 to <4 x float> 313 %.i091 = extractelement <4 x float> %i54, i64 0 314 %.i192 = extractelement <4 x float> %i54, i64 1 315 %.i093 = fadd reassoc nnan nsz arcp contract afn float %.i091, %i19 316 %.i194 = fadd reassoc nnan nsz arcp contract afn float %.i192, %i16 317 %i55 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i093, float %.i194, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 318 %.i095 = extractelement <4 x float> %i55, i64 0 319 %.i096 = fadd reassoc nnan nsz arcp contract afn float %.i095, %.i084 320 %.i197 = extractelement <4 x float> %i55, i64 1 321 %.i198 = fadd reassoc nnan nsz arcp contract afn float %.i197, %.i186 322 %.i299 = extractelement <4 x float> %i55, i64 2 323 %.i2100 = fadd reassoc nnan nsz arcp contract afn float %.i299, %.i288 324 %.i3101 = extractelement <4 x float> %i55, i64 3 325 %.i3102 = fadd reassoc nnan nsz arcp contract afn float %.i3101, %.i390 326 %i56 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 160, i32 0), !invariant.load !0 327 %i57 = shufflevector <2 x i32> %i56, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 328 %i58 = bitcast <4 x i32> %i57 to <4 x float> 329 %.i0103 = extractelement <4 x float> %i58, i64 0 330 %.i1104 = extractelement <4 x float> %i58, i64 1 331 %.i0105 = fadd reassoc nnan nsz arcp contract afn float %.i0103, %i19 332 %.i1106 = fadd reassoc nnan nsz arcp contract afn float %.i1104, %i16 333 %i59 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0105, float %.i1106, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 334 %.i0107 = extractelement <4 x float> %i59, i64 0 335 %.i0108 = fadd reassoc nnan nsz arcp contract afn float %.i0107, %.i096 336 %.i1109 = extractelement <4 x float> %i59, i64 1 337 %.i1110 = fadd reassoc nnan nsz arcp contract afn float %.i1109, %.i198 338 %.i2111 = extractelement <4 x float> %i59, i64 2 339 %.i2112 = fadd reassoc nnan nsz arcp contract afn float %.i2111, %.i2100 340 %.i3113 = extractelement <4 x float> %i59, i64 3 341 %.i3114 = fadd reassoc nnan nsz arcp contract afn float %.i3113, %.i3102 342 %i60 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 176, i32 0), !invariant.load !0 343 %i61 = shufflevector <2 x i32> %i60, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 344 %i62 = bitcast <4 x i32> %i61 to <4 x float> 345 %.i0115 = extractelement <4 x float> %i62, i64 0 346 %.i1116 = extractelement <4 x float> %i62, i64 1 347 %.i0117 = fadd reassoc nnan nsz arcp contract afn float %.i0115, %i19 348 %.i1118 = fadd reassoc nnan nsz arcp contract afn float %.i1116, %i16 349 %i63 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0117, float %.i1118, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 350 %.i0119 = extractelement <4 x float> %i63, i64 0 351 %.i0120 = fadd reassoc nnan nsz arcp contract afn float %.i0119, %.i0108 352 %.i1121 = extractelement <4 x float> %i63, i64 1 353 %.i1122 = fadd reassoc nnan nsz arcp contract afn float %.i1121, %.i1110 354 %.i2123 = extractelement <4 x float> %i63, i64 2 355 %.i2124 = fadd reassoc nnan nsz arcp contract afn float %.i2123, %.i2112 356 %.i3125 = extractelement <4 x float> %i63, i64 3 357 %.i3126 = fadd reassoc nnan nsz arcp contract afn float %.i3125, %.i3114 358 %i64 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 192, i32 0), !invariant.load !0 359 %i65 = shufflevector <2 x i32> %i64, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 360 %i66 = bitcast <4 x i32> %i65 to <4 x float> 361 %.i0127 = extractelement <4 x float> %i66, i64 0 362 %.i1128 = extractelement <4 x float> %i66, i64 1 363 %.i0129 = fadd reassoc nnan nsz arcp contract afn float %.i0127, %i19 364 %.i1130 = fadd reassoc nnan nsz arcp contract afn float %.i1128, %i16 365 %i67 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0129, float %.i1130, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 366 %.i0131 = extractelement <4 x float> %i67, i64 0 367 %.i0132 = fadd reassoc nnan nsz arcp contract afn float %.i0131, %.i0120 368 %.i1133 = extractelement <4 x float> %i67, i64 1 369 %.i1134 = fadd reassoc nnan nsz arcp contract afn float %.i1133, %.i1122 370 %.i2135 = extractelement <4 x float> %i67, i64 2 371 %.i2136 = fadd reassoc nnan nsz arcp contract afn float %.i2135, %.i2124 372 %.i3137 = extractelement <4 x float> %i67, i64 3 373 %.i3138 = fadd reassoc nnan nsz arcp contract afn float %.i3137, %.i3126 374 %i68 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 208, i32 0), !invariant.load !0 375 %i69 = shufflevector <2 x i32> %i68, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 376 %i70 = bitcast <4 x i32> %i69 to <4 x float> 377 %.i0139 = extractelement <4 x float> %i70, i64 0 378 %.i1140 = extractelement <4 x float> %i70, i64 1 379 %.i0141 = fadd reassoc nnan nsz arcp contract afn float %.i0139, %i19 380 %.i1142 = fadd reassoc nnan nsz arcp contract afn float %.i1140, %i16 381 %i71 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0141, float %.i1142, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 382 %.i0143 = extractelement <4 x float> %i71, i64 0 383 %.i0144 = fadd reassoc nnan nsz arcp contract afn float %.i0143, %.i0132 384 %.i1145 = extractelement <4 x float> %i71, i64 1 385 %.i1146 = fadd reassoc nnan nsz arcp contract afn float %.i1145, %.i1134 386 %.i2147 = extractelement <4 x float> %i71, i64 2 387 %.i2148 = fadd reassoc nnan nsz arcp contract afn float %.i2147, %.i2136 388 %.i3149 = extractelement <4 x float> %i71, i64 3 389 %.i3150 = fadd reassoc nnan nsz arcp contract afn float %.i3149, %.i3138 390 %i72 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 224, i32 0), !invariant.load !0 391 %i73 = shufflevector <2 x i32> %i72, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 392 %i74 = bitcast <4 x i32> %i73 to <4 x float> 393 %.i0151 = extractelement <4 x float> %i74, i64 0 394 %.i1152 = extractelement <4 x float> %i74, i64 1 395 %.i0153 = fadd reassoc nnan nsz arcp contract afn float %.i0151, %i19 396 %.i1154 = fadd reassoc nnan nsz arcp contract afn float %.i1152, %i16 397 %i75 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0153, float %.i1154, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 398 %.i0155 = extractelement <4 x float> %i75, i64 0 399 %.i0156 = fadd reassoc nnan nsz arcp contract afn float %.i0155, %.i0144 400 %.i1157 = extractelement <4 x float> %i75, i64 1 401 %.i1158 = fadd reassoc nnan nsz arcp contract afn float %.i1157, %.i1146 402 %.i2159 = extractelement <4 x float> %i75, i64 2 403 %.i2160 = fadd reassoc nnan nsz arcp contract afn float %.i2159, %.i2148 404 %.i3161 = extractelement <4 x float> %i75, i64 3 405 %.i3162 = fadd reassoc nnan nsz arcp contract afn float %.i3161, %.i3150 406 %i76 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 240, i32 0), !invariant.load !0 407 %i77 = shufflevector <2 x i32> %i76, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 408 %i78 = bitcast <4 x i32> %i77 to <4 x float> 409 %.i0163 = extractelement <4 x float> %i78, i64 0 410 %.i1164 = extractelement <4 x float> %i78, i64 1 411 %.i0165 = fadd reassoc nnan nsz arcp contract afn float %.i0163, %i19 412 %.i1166 = fadd reassoc nnan nsz arcp contract afn float %.i1164, %i16 413 %i79 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0165, float %.i1166, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 414 %.i0167 = extractelement <4 x float> %i79, i64 0 415 %.i0168 = fadd reassoc nnan nsz arcp contract afn float %.i0167, %.i0156 416 %.i1169 = extractelement <4 x float> %i79, i64 1 417 %.i1170 = fadd reassoc nnan nsz arcp contract afn float %.i1169, %.i1158 418 %.i2171 = extractelement <4 x float> %i79, i64 2 419 %.i2172 = fadd reassoc nnan nsz arcp contract afn float %.i2171, %.i2160 420 %.i3173 = extractelement <4 x float> %i79, i64 3 421 %.i3174 = fadd reassoc nnan nsz arcp contract afn float %.i3173, %.i3162 422 %i80 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %i5, i32 256, i32 0), !invariant.load !0 423 %i81 = shufflevector <2 x i32> %i80, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> 424 %i82 = bitcast <4 x i32> %i81 to <4 x float> 425 %.i0175 = extractelement <4 x float> %i82, i64 0 426 %.i1176 = extractelement <4 x float> %i82, i64 1 427 %.i0177 = fadd reassoc nnan nsz arcp contract afn float %.i0175, %i19 428 %.i1178 = fadd reassoc nnan nsz arcp contract afn float %.i1176, %i16 429 %i83 = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %.i0177, float %.i1178, <8 x i32> %i13, <4 x i32> %i9, i1 false, i32 0, i32 0) 430 %.i0179 = extractelement <4 x float> %i83, i64 0 431 %.i0180 = fadd reassoc nnan nsz arcp contract afn float %.i0179, %.i0168 432 %.i1181 = extractelement <4 x float> %i83, i64 1 433 %.i1182 = fadd reassoc nnan nsz arcp contract afn float %.i1181, %.i1170 434 %.i2183 = extractelement <4 x float> %i83, i64 2 435 %.i2184 = fadd reassoc nnan nsz arcp contract afn float %.i2183, %.i2172 436 %.i3185 = extractelement <4 x float> %i83, i64 3 437 %.i3186 = fadd reassoc nnan nsz arcp contract afn float %.i3185, %.i3174 438 %i84 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %.i0180, float %.i1182) 439 %i85 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %.i2184, float %.i3186) 440 %i86 = bitcast <2 x half> %i84 to float 441 %i87 = bitcast <2 x half> %i85 to float 442 call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float %i86, float %i87, float poison, float poison, i1 true, i1 true) 443 ret void 444} 445 446declare noundef i64 @llvm.amdgcn.s.getpc() #3 447declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #5 448declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3 449declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #4 450declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #3 451declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #3 452declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #3 453declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #8 454 455attributes #2 = { alwaysinline nounwind memory(readwrite) "amdgpu-sched-strategy"="max-memory-clause" "amdgpu-max-memory-cluster-dwords"="32"} 456attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 457attributes #4 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) } 458attributes #5 = { nocallback nofree nosync nounwind willreturn memory(read) } 459attributes #8 = { nocallback nofree nosync nounwind willreturn memory(none) } 460 461!0 = !{} 462