15ca02815Sjsg // SPDX-License-Identifier: MIT 25ca02815Sjsg /* 35ca02815Sjsg * Copyright © 2014 Intel Corporation 45ca02815Sjsg */ 55ca02815Sjsg 65ca02815Sjsg #include "gen8_engine_cs.h" 71bb76ff1Sjsg #include "intel_engine_regs.h" 85ca02815Sjsg #include "intel_gpu_commands.h" 9596b6869Sjsg #include "intel_gt.h" 101bb76ff1Sjsg #include "intel_lrc.h" 115ca02815Sjsg #include "intel_ring.h" 125ca02815Sjsg 135ca02815Sjsg int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) 145ca02815Sjsg { 155ca02815Sjsg bool vf_flush_wa = false, dc_flush_wa = false; 165ca02815Sjsg u32 *cs, flags = 0; 175ca02815Sjsg int len; 185ca02815Sjsg 195ca02815Sjsg flags |= PIPE_CONTROL_CS_STALL; 205ca02815Sjsg 215ca02815Sjsg if (mode & EMIT_FLUSH) { 225ca02815Sjsg flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 235ca02815Sjsg flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 245ca02815Sjsg flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 255ca02815Sjsg flags |= PIPE_CONTROL_FLUSH_ENABLE; 265ca02815Sjsg } 275ca02815Sjsg 285ca02815Sjsg if (mode & EMIT_INVALIDATE) { 295ca02815Sjsg flags |= PIPE_CONTROL_TLB_INVALIDATE; 305ca02815Sjsg flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 315ca02815Sjsg flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 325ca02815Sjsg flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 335ca02815Sjsg flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 345ca02815Sjsg flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 355ca02815Sjsg flags |= PIPE_CONTROL_QW_WRITE; 365ca02815Sjsg flags |= PIPE_CONTROL_STORE_DATA_INDEX; 375ca02815Sjsg 385ca02815Sjsg /* 395ca02815Sjsg * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL 405ca02815Sjsg * pipe control. 415ca02815Sjsg */ 42f005ef32Sjsg if (GRAPHICS_VER(rq->i915) == 9) 435ca02815Sjsg vf_flush_wa = true; 445ca02815Sjsg 455ca02815Sjsg /* WaForGAMHang:kbl */ 46f005ef32Sjsg if (IS_KABYLAKE(rq->i915) && IS_GRAPHICS_STEP(rq->i915, 0, STEP_C0)) 475ca02815Sjsg dc_flush_wa = true; 485ca02815Sjsg } 495ca02815Sjsg 505ca02815Sjsg len = 6; 515ca02815Sjsg 525ca02815Sjsg if (vf_flush_wa) 535ca02815Sjsg len += 6; 545ca02815Sjsg 555ca02815Sjsg if (dc_flush_wa) 565ca02815Sjsg len += 12; 575ca02815Sjsg 585ca02815Sjsg cs = intel_ring_begin(rq, len); 595ca02815Sjsg if (IS_ERR(cs)) 605ca02815Sjsg return PTR_ERR(cs); 615ca02815Sjsg 625ca02815Sjsg if (vf_flush_wa) 635ca02815Sjsg cs = gen8_emit_pipe_control(cs, 0, 0); 645ca02815Sjsg 655ca02815Sjsg if (dc_flush_wa) 665ca02815Sjsg cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, 675ca02815Sjsg 0); 685ca02815Sjsg 695ca02815Sjsg cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 705ca02815Sjsg 715ca02815Sjsg if (dc_flush_wa) 725ca02815Sjsg cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); 735ca02815Sjsg 745ca02815Sjsg intel_ring_advance(rq, cs); 755ca02815Sjsg 765ca02815Sjsg return 0; 775ca02815Sjsg } 785ca02815Sjsg 795ca02815Sjsg int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode) 805ca02815Sjsg { 815ca02815Sjsg u32 cmd, *cs; 825ca02815Sjsg 835ca02815Sjsg cs = intel_ring_begin(rq, 4); 845ca02815Sjsg if (IS_ERR(cs)) 855ca02815Sjsg return PTR_ERR(cs); 865ca02815Sjsg 875ca02815Sjsg cmd = MI_FLUSH_DW + 1; 885ca02815Sjsg 895ca02815Sjsg /* 905ca02815Sjsg * We always require a command barrier so that subsequent 915ca02815Sjsg * commands, such as breadcrumb interrupts, are strictly ordered 925ca02815Sjsg * wrt the contents of the write cache being flushed to memory 935ca02815Sjsg * (and thus being coherent from the CPU). 945ca02815Sjsg */ 955ca02815Sjsg cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 965ca02815Sjsg 975ca02815Sjsg if (mode & EMIT_INVALIDATE) { 985ca02815Sjsg cmd |= MI_INVALIDATE_TLB; 995ca02815Sjsg if (rq->engine->class == VIDEO_DECODE_CLASS) 1005ca02815Sjsg cmd |= MI_INVALIDATE_BSD; 1015ca02815Sjsg } 1025ca02815Sjsg 1035ca02815Sjsg *cs++ = cmd; 1045ca02815Sjsg *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 1055ca02815Sjsg *cs++ = 0; /* upper addr */ 1065ca02815Sjsg *cs++ = 0; /* value */ 1075ca02815Sjsg intel_ring_advance(rq, cs); 1085ca02815Sjsg 1095ca02815Sjsg return 0; 1105ca02815Sjsg } 1115ca02815Sjsg 1125ca02815Sjsg int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode) 1135ca02815Sjsg { 1145ca02815Sjsg if (mode & EMIT_FLUSH) { 1155ca02815Sjsg u32 *cs; 1165ca02815Sjsg u32 flags = 0; 1175ca02815Sjsg 1185ca02815Sjsg flags |= PIPE_CONTROL_CS_STALL; 1195ca02815Sjsg 1205ca02815Sjsg flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; 1215ca02815Sjsg flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 1225ca02815Sjsg flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 1235ca02815Sjsg flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; 1245ca02815Sjsg flags |= PIPE_CONTROL_FLUSH_ENABLE; 1255ca02815Sjsg flags |= PIPE_CONTROL_QW_WRITE; 1265ca02815Sjsg flags |= PIPE_CONTROL_STORE_DATA_INDEX; 1275ca02815Sjsg 1285ca02815Sjsg cs = intel_ring_begin(rq, 6); 1295ca02815Sjsg if (IS_ERR(cs)) 1305ca02815Sjsg return PTR_ERR(cs); 1315ca02815Sjsg 1325ca02815Sjsg cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 1335ca02815Sjsg intel_ring_advance(rq, cs); 1345ca02815Sjsg } 1355ca02815Sjsg 1365ca02815Sjsg if (mode & EMIT_INVALIDATE) { 1375ca02815Sjsg u32 *cs; 1385ca02815Sjsg u32 flags = 0; 1395ca02815Sjsg 1405ca02815Sjsg flags |= PIPE_CONTROL_CS_STALL; 1415ca02815Sjsg 1425ca02815Sjsg flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 1435ca02815Sjsg flags |= PIPE_CONTROL_TLB_INVALIDATE; 1445ca02815Sjsg flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 1455ca02815Sjsg flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 1465ca02815Sjsg flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 1475ca02815Sjsg flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 1485ca02815Sjsg flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 1495ca02815Sjsg flags |= PIPE_CONTROL_QW_WRITE; 1505ca02815Sjsg flags |= PIPE_CONTROL_STORE_DATA_INDEX; 1515ca02815Sjsg 1525ca02815Sjsg cs = intel_ring_begin(rq, 6); 1535ca02815Sjsg if (IS_ERR(cs)) 1545ca02815Sjsg return PTR_ERR(cs); 1555ca02815Sjsg 1565ca02815Sjsg cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 1575ca02815Sjsg intel_ring_advance(rq, cs); 1585ca02815Sjsg } 1595ca02815Sjsg 1605ca02815Sjsg return 0; 1615ca02815Sjsg } 1625ca02815Sjsg 1635ca02815Sjsg static u32 preparser_disable(bool state) 1645ca02815Sjsg { 1655ca02815Sjsg return MI_ARB_CHECK | 1 << 8 | state; 1665ca02815Sjsg } 1675ca02815Sjsg 168df2f834eSjsg static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine) 169df2f834eSjsg { 170df2f834eSjsg switch (engine->id) { 171df2f834eSjsg case RCS0: 172df2f834eSjsg return GEN12_CCS_AUX_INV; 173df2f834eSjsg case BCS0: 174df2f834eSjsg return GEN12_BCS0_AUX_INV; 175df2f834eSjsg case VCS0: 176df2f834eSjsg return GEN12_VD0_AUX_INV; 177df2f834eSjsg case VCS2: 178df2f834eSjsg return GEN12_VD2_AUX_INV; 179df2f834eSjsg case VECS0: 180df2f834eSjsg return GEN12_VE0_AUX_INV; 181df2f834eSjsg case CCS0: 182df2f834eSjsg return GEN12_CCS0_AUX_INV; 183df2f834eSjsg default: 184df2f834eSjsg return INVALID_MMIO_REG; 185df2f834eSjsg } 186df2f834eSjsg } 187df2f834eSjsg 1888028d8eaSjsg static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine) 1898028d8eaSjsg { 190df2f834eSjsg i915_reg_t reg = gen12_get_aux_inv_reg(engine); 191df2f834eSjsg 1928028d8eaSjsg if (IS_PONTEVECCHIO(engine->i915)) 1938028d8eaSjsg return false; 1948028d8eaSjsg 1958028d8eaSjsg /* 196df2f834eSjsg * So far platforms supported by i915 having flat ccs do not require 197df2f834eSjsg * AUX invalidation. Check also whether the engine requires it. 1988028d8eaSjsg */ 199df2f834eSjsg return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915); 2008028d8eaSjsg } 2018028d8eaSjsg 202df2f834eSjsg u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) 2035ca02815Sjsg { 204df2f834eSjsg i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine); 205df2f834eSjsg u32 gsi_offset = engine->gt->uncore->gsi_offset; 206df2f834eSjsg 207df2f834eSjsg if (!gen12_needs_ccs_aux_inv(engine)) 208df2f834eSjsg return cs; 2095ca02815Sjsg 2101bb76ff1Sjsg *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN; 2111bb76ff1Sjsg *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; 2125ca02815Sjsg *cs++ = AUX_INV; 213f6aeda7dSjsg 214f6aeda7dSjsg *cs++ = MI_SEMAPHORE_WAIT_TOKEN | 215f6aeda7dSjsg MI_SEMAPHORE_REGISTER_POLL | 216f6aeda7dSjsg MI_SEMAPHORE_POLL | 217f6aeda7dSjsg MI_SEMAPHORE_SAD_EQ_SDD; 218f6aeda7dSjsg *cs++ = 0; 219f6aeda7dSjsg *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset; 220f6aeda7dSjsg *cs++ = 0; 221f6aeda7dSjsg *cs++ = 0; 2225ca02815Sjsg 2235ca02815Sjsg return cs; 2245ca02815Sjsg } 2255ca02815Sjsg 226f005ef32Sjsg static int mtl_dummy_pipe_control(struct i915_request *rq) 227f005ef32Sjsg { 228f005ef32Sjsg /* Wa_14016712196 */ 2296e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || 23093533976Sjsg IS_DG2(rq->i915)) { 231f005ef32Sjsg u32 *cs; 232f005ef32Sjsg 233f005ef32Sjsg /* dummy PIPE_CONTROL + depth flush */ 234f005ef32Sjsg cs = intel_ring_begin(rq, 6); 235f005ef32Sjsg if (IS_ERR(cs)) 236f005ef32Sjsg return PTR_ERR(cs); 237f005ef32Sjsg cs = gen12_emit_pipe_control(cs, 238f005ef32Sjsg 0, 239f005ef32Sjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH, 240f005ef32Sjsg LRC_PPHWSP_SCRATCH_ADDR); 241f005ef32Sjsg intel_ring_advance(rq, cs); 242f005ef32Sjsg } 243f005ef32Sjsg 244f005ef32Sjsg return 0; 245f005ef32Sjsg } 246f005ef32Sjsg 2475ca02815Sjsg int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) 2485ca02815Sjsg { 2491bb76ff1Sjsg struct intel_engine_cs *engine = rq->engine; 2501bb76ff1Sjsg 2519ce1c53cSjsg /* 2529ce1c53cSjsg * On Aux CCS platforms the invalidation of the Aux 2539ce1c53cSjsg * table requires quiescing memory traffic beforehand 2549ce1c53cSjsg */ 2559ce1c53cSjsg if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) { 256f005ef32Sjsg u32 bit_group_0 = 0; 257f005ef32Sjsg u32 bit_group_1 = 0; 258f005ef32Sjsg int err; 2595ca02815Sjsg u32 *cs; 2605ca02815Sjsg 261f005ef32Sjsg err = mtl_dummy_pipe_control(rq); 262f005ef32Sjsg if (err) 263f005ef32Sjsg return err; 264f005ef32Sjsg 265f005ef32Sjsg bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH; 266f005ef32Sjsg 267f005ef32Sjsg /* 268f005ef32Sjsg * When required, in MTL and beyond platforms we 269f005ef32Sjsg * need to set the CCS_FLUSH bit in the pipe control 270f005ef32Sjsg */ 271f005ef32Sjsg if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70)) 272f005ef32Sjsg bit_group_0 |= PIPE_CONTROL_CCS_FLUSH; 273f005ef32Sjsg 27442b1e46fSjsg /* 27542b1e46fSjsg * L3 fabric flush is needed for AUX CCS invalidation 27642b1e46fSjsg * which happens as part of pipe-control so we can 27742b1e46fSjsg * ignore PIPE_CONTROL_FLUSH_L3. Also PIPE_CONTROL_FLUSH_L3 27842b1e46fSjsg * deals with Protected Memory which is not needed for 27942b1e46fSjsg * AUX CCS invalidation and lead to unwanted side effects. 28042b1e46fSjsg */ 281*69bddb60Sjsg if ((mode & EMIT_FLUSH) && 282*69bddb60Sjsg GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) 283f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_FLUSH_L3; 28442b1e46fSjsg 285f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; 286f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; 287f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; 2885ca02815Sjsg /* Wa_1409600907:tgl,adl-p */ 289f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_DEPTH_STALL; 290f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE; 291f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE; 2925ca02815Sjsg 293f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX; 294f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_QW_WRITE; 2955ca02815Sjsg 296f005ef32Sjsg bit_group_1 |= PIPE_CONTROL_CS_STALL; 2975ca02815Sjsg 2981bb76ff1Sjsg if (!HAS_3D_PIPELINE(engine->i915)) 299f005ef32Sjsg bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS; 3001bb76ff1Sjsg else if (engine->class == COMPUTE_CLASS) 301f005ef32Sjsg bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 3021bb76ff1Sjsg 3035ca02815Sjsg cs = intel_ring_begin(rq, 6); 3045ca02815Sjsg if (IS_ERR(cs)) 3055ca02815Sjsg return PTR_ERR(cs); 3065ca02815Sjsg 307f005ef32Sjsg cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1, 308f005ef32Sjsg LRC_PPHWSP_SCRATCH_ADDR); 3095ca02815Sjsg intel_ring_advance(rq, cs); 3105ca02815Sjsg } 3115ca02815Sjsg 3125ca02815Sjsg if (mode & EMIT_INVALIDATE) { 3135ca02815Sjsg u32 flags = 0; 3141bb76ff1Sjsg u32 *cs, count; 315f005ef32Sjsg int err; 316f005ef32Sjsg 317f005ef32Sjsg err = mtl_dummy_pipe_control(rq); 318f005ef32Sjsg if (err) 319f005ef32Sjsg return err; 3205ca02815Sjsg 3215ca02815Sjsg flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; 3225ca02815Sjsg flags |= PIPE_CONTROL_TLB_INVALIDATE; 3235ca02815Sjsg flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; 3245ca02815Sjsg flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; 3255ca02815Sjsg flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; 3265ca02815Sjsg flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; 3275ca02815Sjsg flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; 3285ca02815Sjsg 3295ca02815Sjsg flags |= PIPE_CONTROL_STORE_DATA_INDEX; 3305ca02815Sjsg flags |= PIPE_CONTROL_QW_WRITE; 3315ca02815Sjsg 3325ca02815Sjsg flags |= PIPE_CONTROL_CS_STALL; 3335ca02815Sjsg 3341bb76ff1Sjsg if (!HAS_3D_PIPELINE(engine->i915)) 3351bb76ff1Sjsg flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; 3361bb76ff1Sjsg else if (engine->class == COMPUTE_CLASS) 3371bb76ff1Sjsg flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 3381bb76ff1Sjsg 3391bb76ff1Sjsg count = 8; 340f6aeda7dSjsg if (gen12_needs_ccs_aux_inv(rq->engine)) 341f6aeda7dSjsg count += 8; 3421bb76ff1Sjsg 3431bb76ff1Sjsg cs = intel_ring_begin(rq, count); 3445ca02815Sjsg if (IS_ERR(cs)) 3455ca02815Sjsg return PTR_ERR(cs); 3465ca02815Sjsg 3475ca02815Sjsg /* 3485ca02815Sjsg * Prevent the pre-parser from skipping past the TLB 3495ca02815Sjsg * invalidate and loading a stale page for the batch 3505ca02815Sjsg * buffer / request payload. 3515ca02815Sjsg */ 3525ca02815Sjsg *cs++ = preparser_disable(true); 3535ca02815Sjsg 3545ca02815Sjsg cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); 3555ca02815Sjsg 356df2f834eSjsg cs = gen12_emit_aux_table_inv(engine, cs); 3575ca02815Sjsg 3585ca02815Sjsg *cs++ = preparser_disable(false); 3595ca02815Sjsg intel_ring_advance(rq, cs); 3605ca02815Sjsg } 3615ca02815Sjsg 3625ca02815Sjsg return 0; 3635ca02815Sjsg } 3645ca02815Sjsg 3655ca02815Sjsg int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) 3665ca02815Sjsg { 367df2f834eSjsg u32 cmd = 4; 368df2f834eSjsg u32 *cs; 3695ca02815Sjsg 3701bb76ff1Sjsg if (mode & EMIT_INVALIDATE) { 3715ca02815Sjsg cmd += 2; 3721bb76ff1Sjsg 373df2f834eSjsg if (gen12_needs_ccs_aux_inv(rq->engine)) 374f6aeda7dSjsg cmd += 8; 3751bb76ff1Sjsg } 3765ca02815Sjsg 3775ca02815Sjsg cs = intel_ring_begin(rq, cmd); 3785ca02815Sjsg if (IS_ERR(cs)) 3795ca02815Sjsg return PTR_ERR(cs); 3805ca02815Sjsg 3815ca02815Sjsg if (mode & EMIT_INVALIDATE) 3825ca02815Sjsg *cs++ = preparser_disable(true); 3835ca02815Sjsg 3845ca02815Sjsg cmd = MI_FLUSH_DW + 1; 3855ca02815Sjsg 3865ca02815Sjsg /* 3875ca02815Sjsg * We always require a command barrier so that subsequent 3885ca02815Sjsg * commands, such as breadcrumb interrupts, are strictly ordered 3895ca02815Sjsg * wrt the contents of the write cache being flushed to memory 3905ca02815Sjsg * (and thus being coherent from the CPU). 3915ca02815Sjsg */ 3925ca02815Sjsg cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; 3935ca02815Sjsg 3945ca02815Sjsg if (mode & EMIT_INVALIDATE) { 3955ca02815Sjsg cmd |= MI_INVALIDATE_TLB; 3965ca02815Sjsg if (rq->engine->class == VIDEO_DECODE_CLASS) 3975ca02815Sjsg cmd |= MI_INVALIDATE_BSD; 398f005ef32Sjsg 399f005ef32Sjsg if (gen12_needs_ccs_aux_inv(rq->engine) && 400f005ef32Sjsg rq->engine->class == COPY_ENGINE_CLASS) 401f005ef32Sjsg cmd |= MI_FLUSH_DW_CCS; 4025ca02815Sjsg } 4035ca02815Sjsg 4045ca02815Sjsg *cs++ = cmd; 4055ca02815Sjsg *cs++ = LRC_PPHWSP_SCRATCH_ADDR; 4065ca02815Sjsg *cs++ = 0; /* upper addr */ 4075ca02815Sjsg *cs++ = 0; /* value */ 4085ca02815Sjsg 409df2f834eSjsg cs = gen12_emit_aux_table_inv(rq->engine, cs); 4105ca02815Sjsg 4115ca02815Sjsg if (mode & EMIT_INVALIDATE) 4125ca02815Sjsg *cs++ = preparser_disable(false); 4135ca02815Sjsg 4145ca02815Sjsg intel_ring_advance(rq, cs); 4155ca02815Sjsg 4165ca02815Sjsg return 0; 4175ca02815Sjsg } 4185ca02815Sjsg 4195ca02815Sjsg static u32 preempt_address(struct intel_engine_cs *engine) 4205ca02815Sjsg { 4215ca02815Sjsg return (i915_ggtt_offset(engine->status_page.vma) + 4225ca02815Sjsg I915_GEM_HWS_PREEMPT_ADDR); 4235ca02815Sjsg } 4245ca02815Sjsg 4255ca02815Sjsg static u32 hwsp_offset(const struct i915_request *rq) 4265ca02815Sjsg { 4275ca02815Sjsg const struct intel_timeline *tl; 4285ca02815Sjsg 4295ca02815Sjsg /* Before the request is executed, the timeline is fixed */ 4305ca02815Sjsg tl = rcu_dereference_protected(rq->timeline, 4315ca02815Sjsg !i915_request_signaled(rq)); 4325ca02815Sjsg 4335ca02815Sjsg /* See the comment in i915_request_active_seqno(). */ 4345ca02815Sjsg return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno); 4355ca02815Sjsg } 4365ca02815Sjsg 4375ca02815Sjsg int gen8_emit_init_breadcrumb(struct i915_request *rq) 4385ca02815Sjsg { 4395ca02815Sjsg u32 *cs; 4405ca02815Sjsg 4415ca02815Sjsg GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); 4425ca02815Sjsg if (!i915_request_timeline(rq)->has_initial_breadcrumb) 4435ca02815Sjsg return 0; 4445ca02815Sjsg 4455ca02815Sjsg cs = intel_ring_begin(rq, 6); 4465ca02815Sjsg if (IS_ERR(cs)) 4475ca02815Sjsg return PTR_ERR(cs); 4485ca02815Sjsg 4495ca02815Sjsg *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 4505ca02815Sjsg *cs++ = hwsp_offset(rq); 4515ca02815Sjsg *cs++ = 0; 4525ca02815Sjsg *cs++ = rq->fence.seqno - 1; 4535ca02815Sjsg 4545ca02815Sjsg /* 4555ca02815Sjsg * Check if we have been preempted before we even get started. 4565ca02815Sjsg * 4575ca02815Sjsg * After this point i915_request_started() reports true, even if 4585ca02815Sjsg * we get preempted and so are no longer running. 4595ca02815Sjsg * 4605ca02815Sjsg * i915_request_started() is used during preemption processing 4615ca02815Sjsg * to decide if the request is currently inside the user payload 4625ca02815Sjsg * or spinning on a kernel semaphore (or earlier). For no-preemption 4635ca02815Sjsg * requests, we do allow preemption on the semaphore before the user 4645ca02815Sjsg * payload, but do not allow preemption once the request is started. 4655ca02815Sjsg * 4665ca02815Sjsg * i915_request_started() is similarly used during GPU hangs to 4675ca02815Sjsg * determine if the user's payload was guilty, and if so, the 4685ca02815Sjsg * request is banned. Before the request is started, it is assumed 4695ca02815Sjsg * to be unharmed and an innocent victim of another's hang. 4705ca02815Sjsg */ 4715ca02815Sjsg *cs++ = MI_NOOP; 4725ca02815Sjsg *cs++ = MI_ARB_CHECK; 4735ca02815Sjsg 4745ca02815Sjsg intel_ring_advance(rq, cs); 4755ca02815Sjsg 4765ca02815Sjsg /* Record the updated position of the request's payload */ 4775ca02815Sjsg rq->infix = intel_ring_offset(rq, cs); 4785ca02815Sjsg 4795ca02815Sjsg __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); 4805ca02815Sjsg 4815ca02815Sjsg return 0; 4825ca02815Sjsg } 4835ca02815Sjsg 484f005ef32Sjsg static int __xehp_emit_bb_start(struct i915_request *rq, 4851bb76ff1Sjsg u64 offset, u32 len, 4861bb76ff1Sjsg const unsigned int flags, 4871bb76ff1Sjsg u32 arb) 4881bb76ff1Sjsg { 4891bb76ff1Sjsg struct intel_context *ce = rq->context; 4901bb76ff1Sjsg u32 wa_offset = lrc_indirect_bb(ce); 4911bb76ff1Sjsg u32 *cs; 4921bb76ff1Sjsg 493f005ef32Sjsg GEM_BUG_ON(!ce->wa_bb_page); 494f005ef32Sjsg 4951bb76ff1Sjsg cs = intel_ring_begin(rq, 12); 4961bb76ff1Sjsg if (IS_ERR(cs)) 4971bb76ff1Sjsg return PTR_ERR(cs); 4981bb76ff1Sjsg 4991bb76ff1Sjsg *cs++ = MI_ARB_ON_OFF | arb; 5001bb76ff1Sjsg 5011bb76ff1Sjsg *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | 5021bb76ff1Sjsg MI_SRM_LRM_GLOBAL_GTT | 5031bb76ff1Sjsg MI_LRI_LRM_CS_MMIO; 5041bb76ff1Sjsg *cs++ = i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)); 5051bb76ff1Sjsg *cs++ = wa_offset + DG2_PREDICATE_RESULT_WA; 5061bb76ff1Sjsg *cs++ = 0; 5071bb76ff1Sjsg 5081bb76ff1Sjsg *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5091bb76ff1Sjsg (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5101bb76ff1Sjsg *cs++ = lower_32_bits(offset); 5111bb76ff1Sjsg *cs++ = upper_32_bits(offset); 5121bb76ff1Sjsg 5131bb76ff1Sjsg /* Fixup stray MI_SET_PREDICATE as it prevents us executing the ring */ 5141bb76ff1Sjsg *cs++ = MI_BATCH_BUFFER_START_GEN8; 5151bb76ff1Sjsg *cs++ = wa_offset + DG2_PREDICATE_RESULT_BB; 5161bb76ff1Sjsg *cs++ = 0; 5171bb76ff1Sjsg 5181bb76ff1Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5191bb76ff1Sjsg 5201bb76ff1Sjsg intel_ring_advance(rq, cs); 5211bb76ff1Sjsg 5221bb76ff1Sjsg return 0; 5231bb76ff1Sjsg } 5241bb76ff1Sjsg 525f005ef32Sjsg int xehp_emit_bb_start_noarb(struct i915_request *rq, 5261bb76ff1Sjsg u64 offset, u32 len, 5271bb76ff1Sjsg const unsigned int flags) 5281bb76ff1Sjsg { 529f005ef32Sjsg return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE); 5301bb76ff1Sjsg } 5311bb76ff1Sjsg 532f005ef32Sjsg int xehp_emit_bb_start(struct i915_request *rq, 5331bb76ff1Sjsg u64 offset, u32 len, 5341bb76ff1Sjsg const unsigned int flags) 5351bb76ff1Sjsg { 536f005ef32Sjsg return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE); 5371bb76ff1Sjsg } 5381bb76ff1Sjsg 5395ca02815Sjsg int gen8_emit_bb_start_noarb(struct i915_request *rq, 5405ca02815Sjsg u64 offset, u32 len, 5415ca02815Sjsg const unsigned int flags) 5425ca02815Sjsg { 5435ca02815Sjsg u32 *cs; 5445ca02815Sjsg 5455ca02815Sjsg cs = intel_ring_begin(rq, 4); 5465ca02815Sjsg if (IS_ERR(cs)) 5475ca02815Sjsg return PTR_ERR(cs); 5485ca02815Sjsg 5495ca02815Sjsg /* 5505ca02815Sjsg * WaDisableCtxRestoreArbitration:bdw,chv 5515ca02815Sjsg * 5525ca02815Sjsg * We don't need to perform MI_ARB_ENABLE as often as we do (in 5535ca02815Sjsg * particular all the gen that do not need the w/a at all!), if we 5545ca02815Sjsg * took care to make sure that on every switch into this context 5555ca02815Sjsg * (both ordinary and for preemption) that arbitrartion was enabled 5565ca02815Sjsg * we would be fine. However, for gen8 there is another w/a that 5575ca02815Sjsg * requires us to not preempt inside GPGPU execution, so we keep 5585ca02815Sjsg * arbitration disabled for gen8 batches. Arbitration will be 5595ca02815Sjsg * re-enabled before we close the request 5605ca02815Sjsg * (engine->emit_fini_breadcrumb). 5615ca02815Sjsg */ 5625ca02815Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5635ca02815Sjsg 5645ca02815Sjsg /* FIXME(BDW+): Address space and security selectors. */ 5655ca02815Sjsg *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5665ca02815Sjsg (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5675ca02815Sjsg *cs++ = lower_32_bits(offset); 5685ca02815Sjsg *cs++ = upper_32_bits(offset); 5695ca02815Sjsg 5705ca02815Sjsg intel_ring_advance(rq, cs); 5715ca02815Sjsg 5725ca02815Sjsg return 0; 5735ca02815Sjsg } 5745ca02815Sjsg 5755ca02815Sjsg int gen8_emit_bb_start(struct i915_request *rq, 5765ca02815Sjsg u64 offset, u32 len, 5775ca02815Sjsg const unsigned int flags) 5785ca02815Sjsg { 5795ca02815Sjsg u32 *cs; 5805ca02815Sjsg 5815ca02815Sjsg if (unlikely(i915_request_has_nopreempt(rq))) 5825ca02815Sjsg return gen8_emit_bb_start_noarb(rq, offset, len, flags); 5835ca02815Sjsg 5845ca02815Sjsg cs = intel_ring_begin(rq, 6); 5855ca02815Sjsg if (IS_ERR(cs)) 5865ca02815Sjsg return PTR_ERR(cs); 5875ca02815Sjsg 5885ca02815Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 5895ca02815Sjsg 5905ca02815Sjsg *cs++ = MI_BATCH_BUFFER_START_GEN8 | 5915ca02815Sjsg (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); 5925ca02815Sjsg *cs++ = lower_32_bits(offset); 5935ca02815Sjsg *cs++ = upper_32_bits(offset); 5945ca02815Sjsg 5955ca02815Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; 5965ca02815Sjsg *cs++ = MI_NOOP; 5975ca02815Sjsg 5985ca02815Sjsg intel_ring_advance(rq, cs); 5995ca02815Sjsg 6005ca02815Sjsg return 0; 6015ca02815Sjsg } 6025ca02815Sjsg 6035ca02815Sjsg static void assert_request_valid(struct i915_request *rq) 6045ca02815Sjsg { 6055ca02815Sjsg struct intel_ring *ring __maybe_unused = rq->ring; 6065ca02815Sjsg 6075ca02815Sjsg /* Can we unwind this request without appearing to go forwards? */ 6085ca02815Sjsg GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); 6095ca02815Sjsg } 6105ca02815Sjsg 6115ca02815Sjsg /* 6125ca02815Sjsg * Reserve space for 2 NOOPs at the end of each request to be 6135ca02815Sjsg * used as a workaround for not being allowed to do lite 6145ca02815Sjsg * restore with HEAD==TAIL (WaIdleLiteRestore). 6155ca02815Sjsg */ 6165ca02815Sjsg static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) 6175ca02815Sjsg { 6185ca02815Sjsg /* Ensure there's always at least one preemption point per-request. */ 6195ca02815Sjsg *cs++ = MI_ARB_CHECK; 6205ca02815Sjsg *cs++ = MI_NOOP; 6215ca02815Sjsg rq->wa_tail = intel_ring_offset(rq, cs); 6225ca02815Sjsg 6235ca02815Sjsg /* Check that entire request is less than half the ring */ 6245ca02815Sjsg assert_request_valid(rq); 6255ca02815Sjsg 6265ca02815Sjsg return cs; 6275ca02815Sjsg } 6285ca02815Sjsg 6295ca02815Sjsg static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) 6305ca02815Sjsg { 6315ca02815Sjsg *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ 6325ca02815Sjsg *cs++ = MI_SEMAPHORE_WAIT | 6335ca02815Sjsg MI_SEMAPHORE_GLOBAL_GTT | 6345ca02815Sjsg MI_SEMAPHORE_POLL | 6355ca02815Sjsg MI_SEMAPHORE_SAD_EQ_SDD; 6365ca02815Sjsg *cs++ = 0; 6375ca02815Sjsg *cs++ = preempt_address(rq->engine); 6385ca02815Sjsg *cs++ = 0; 6395ca02815Sjsg *cs++ = MI_NOOP; 6405ca02815Sjsg 6415ca02815Sjsg return cs; 6425ca02815Sjsg } 6435ca02815Sjsg 6445ca02815Sjsg static __always_inline u32* 6455ca02815Sjsg gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) 6465ca02815Sjsg { 6475ca02815Sjsg *cs++ = MI_USER_INTERRUPT; 6485ca02815Sjsg 6495ca02815Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 6505ca02815Sjsg if (intel_engine_has_semaphores(rq->engine) && 6515ca02815Sjsg !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) 6525ca02815Sjsg cs = emit_preempt_busywait(rq, cs); 6535ca02815Sjsg 6545ca02815Sjsg rq->tail = intel_ring_offset(rq, cs); 6555ca02815Sjsg assert_ring_tail_valid(rq->ring, rq->tail); 6565ca02815Sjsg 6575ca02815Sjsg return gen8_emit_wa_tail(rq, cs); 6585ca02815Sjsg } 6595ca02815Sjsg 6605ca02815Sjsg static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) 6615ca02815Sjsg { 6625ca02815Sjsg return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); 6635ca02815Sjsg } 6645ca02815Sjsg 6655ca02815Sjsg u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) 6665ca02815Sjsg { 6675ca02815Sjsg return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); 6685ca02815Sjsg } 6695ca02815Sjsg 6705ca02815Sjsg u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 6715ca02815Sjsg { 6725ca02815Sjsg cs = gen8_emit_pipe_control(cs, 673f005ef32Sjsg PIPE_CONTROL_CS_STALL | 674f005ef32Sjsg PIPE_CONTROL_TLB_INVALIDATE | 6755ca02815Sjsg PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 6765ca02815Sjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 6775ca02815Sjsg PIPE_CONTROL_DC_FLUSH_ENABLE, 6785ca02815Sjsg 0); 6795ca02815Sjsg 6805ca02815Sjsg /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ 6815ca02815Sjsg cs = gen8_emit_ggtt_write_rcs(cs, 6825ca02815Sjsg rq->fence.seqno, 6835ca02815Sjsg hwsp_offset(rq), 6845ca02815Sjsg PIPE_CONTROL_FLUSH_ENABLE | 6855ca02815Sjsg PIPE_CONTROL_CS_STALL); 6865ca02815Sjsg 6875ca02815Sjsg return gen8_emit_fini_breadcrumb_tail(rq, cs); 6885ca02815Sjsg } 6895ca02815Sjsg 6905ca02815Sjsg u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 6915ca02815Sjsg { 692f005ef32Sjsg cs = gen8_emit_pipe_control(cs, 6935ca02815Sjsg PIPE_CONTROL_CS_STALL | 694f005ef32Sjsg PIPE_CONTROL_TLB_INVALIDATE | 6955ca02815Sjsg PIPE_CONTROL_TILE_CACHE_FLUSH | 6965ca02815Sjsg PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 6975ca02815Sjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 698f005ef32Sjsg PIPE_CONTROL_DC_FLUSH_ENABLE, 699f005ef32Sjsg 0); 700f005ef32Sjsg 701f005ef32Sjsg /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ 702f005ef32Sjsg cs = gen8_emit_ggtt_write_rcs(cs, 703f005ef32Sjsg rq->fence.seqno, 704f005ef32Sjsg hwsp_offset(rq), 705f005ef32Sjsg PIPE_CONTROL_FLUSH_ENABLE | 706f005ef32Sjsg PIPE_CONTROL_CS_STALL); 7075ca02815Sjsg 7085ca02815Sjsg return gen8_emit_fini_breadcrumb_tail(rq, cs); 7095ca02815Sjsg } 7105ca02815Sjsg 7115ca02815Sjsg /* 7125ca02815Sjsg * Note that the CS instruction pre-parser will not stall on the breadcrumb 7135ca02815Sjsg * flush and will continue pre-fetching the instructions after it before the 7145ca02815Sjsg * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at 7155ca02815Sjsg * BB_START/END instructions, so, even though we might pre-fetch the pre-amble 7165ca02815Sjsg * of the next request before the memory has been flushed, we're guaranteed that 7175ca02815Sjsg * we won't access the batch itself too early. 7185ca02815Sjsg * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, 7195ca02815Sjsg * so, if the current request is modifying an instruction in the next request on 7205ca02815Sjsg * the same intel_context, we might pre-fetch and then execute the pre-update 7215ca02815Sjsg * instruction. To avoid this, the users of self-modifying code should either 7225ca02815Sjsg * disable the parser around the code emitting the memory writes, via a new flag 7235ca02815Sjsg * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For 7245ca02815Sjsg * the in-kernel use-cases we've opted to use a separate context, see 7255ca02815Sjsg * reloc_gpu() as an example. 7265ca02815Sjsg * All the above applies only to the instructions themselves. Non-inline data 7275ca02815Sjsg * used by the instructions is not pre-fetched. 7285ca02815Sjsg */ 7295ca02815Sjsg 7305ca02815Sjsg static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) 7315ca02815Sjsg { 7325ca02815Sjsg *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ 7335ca02815Sjsg *cs++ = MI_SEMAPHORE_WAIT_TOKEN | 7345ca02815Sjsg MI_SEMAPHORE_GLOBAL_GTT | 7355ca02815Sjsg MI_SEMAPHORE_POLL | 7365ca02815Sjsg MI_SEMAPHORE_SAD_EQ_SDD; 7375ca02815Sjsg *cs++ = 0; 7385ca02815Sjsg *cs++ = preempt_address(rq->engine); 7395ca02815Sjsg *cs++ = 0; 7405ca02815Sjsg *cs++ = 0; 7415ca02815Sjsg 7425ca02815Sjsg return cs; 7435ca02815Sjsg } 7445ca02815Sjsg 7451bb76ff1Sjsg /* Wa_14014475959:dg2 */ 7461bb76ff1Sjsg #define CCS_SEMAPHORE_PPHWSP_OFFSET 0x540 7471bb76ff1Sjsg static u32 ccs_semaphore_offset(struct i915_request *rq) 7481bb76ff1Sjsg { 7491bb76ff1Sjsg return i915_ggtt_offset(rq->context->state) + 7501bb76ff1Sjsg (LRC_PPHWSP_PN * PAGE_SIZE) + CCS_SEMAPHORE_PPHWSP_OFFSET; 7511bb76ff1Sjsg } 7521bb76ff1Sjsg 7531bb76ff1Sjsg /* Wa_14014475959:dg2 */ 7541bb76ff1Sjsg static u32 *ccs_emit_wa_busywait(struct i915_request *rq, u32 *cs) 7551bb76ff1Sjsg { 7561bb76ff1Sjsg int i; 7571bb76ff1Sjsg 7581bb76ff1Sjsg *cs++ = MI_ATOMIC_INLINE | MI_ATOMIC_GLOBAL_GTT | MI_ATOMIC_CS_STALL | 7591bb76ff1Sjsg MI_ATOMIC_MOVE; 7601bb76ff1Sjsg *cs++ = ccs_semaphore_offset(rq); 7611bb76ff1Sjsg *cs++ = 0; 7621bb76ff1Sjsg *cs++ = 1; 7631bb76ff1Sjsg 7641bb76ff1Sjsg /* 7651bb76ff1Sjsg * When MI_ATOMIC_INLINE_DATA set this command must be 11 DW + (1 NOP) 7661bb76ff1Sjsg * to align. 4 DWs above + 8 filler DWs here. 7671bb76ff1Sjsg */ 7681bb76ff1Sjsg for (i = 0; i < 8; ++i) 7691bb76ff1Sjsg *cs++ = 0; 7701bb76ff1Sjsg 7711bb76ff1Sjsg *cs++ = MI_SEMAPHORE_WAIT | 7721bb76ff1Sjsg MI_SEMAPHORE_GLOBAL_GTT | 7731bb76ff1Sjsg MI_SEMAPHORE_POLL | 7741bb76ff1Sjsg MI_SEMAPHORE_SAD_EQ_SDD; 7751bb76ff1Sjsg *cs++ = 0; 7761bb76ff1Sjsg *cs++ = ccs_semaphore_offset(rq); 7771bb76ff1Sjsg *cs++ = 0; 7781bb76ff1Sjsg 7791bb76ff1Sjsg return cs; 7801bb76ff1Sjsg } 7811bb76ff1Sjsg 7825ca02815Sjsg static __always_inline u32* 7835ca02815Sjsg gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) 7845ca02815Sjsg { 7855ca02815Sjsg *cs++ = MI_USER_INTERRUPT; 7865ca02815Sjsg 7875ca02815Sjsg *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; 7885ca02815Sjsg if (intel_engine_has_semaphores(rq->engine) && 7895ca02815Sjsg !intel_uc_uses_guc_submission(&rq->engine->gt->uc)) 7905ca02815Sjsg cs = gen12_emit_preempt_busywait(rq, cs); 7915ca02815Sjsg 7921bb76ff1Sjsg /* Wa_14014475959:dg2 */ 7931bb76ff1Sjsg if (intel_engine_uses_wa_hold_ccs_switchout(rq->engine)) 7941bb76ff1Sjsg cs = ccs_emit_wa_busywait(rq, cs); 7951bb76ff1Sjsg 7965ca02815Sjsg rq->tail = intel_ring_offset(rq, cs); 7975ca02815Sjsg assert_ring_tail_valid(rq->ring, rq->tail); 7985ca02815Sjsg 7995ca02815Sjsg return gen8_emit_wa_tail(rq, cs); 8005ca02815Sjsg } 8015ca02815Sjsg 8025ca02815Sjsg u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) 8035ca02815Sjsg { 8045ca02815Sjsg /* XXX Stalling flush before seqno write; post-sync not */ 8055ca02815Sjsg cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); 8065ca02815Sjsg return gen12_emit_fini_breadcrumb_tail(rq, cs); 8075ca02815Sjsg } 8085ca02815Sjsg 8095ca02815Sjsg u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) 8105ca02815Sjsg { 811f005ef32Sjsg struct drm_i915_private *i915 = rq->i915; 812596b6869Sjsg struct intel_gt *gt = rq->engine->gt; 8131bb76ff1Sjsg u32 flags = (PIPE_CONTROL_CS_STALL | 814f005ef32Sjsg PIPE_CONTROL_TLB_INVALIDATE | 8155ca02815Sjsg PIPE_CONTROL_TILE_CACHE_FLUSH | 8165ca02815Sjsg PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | 8175ca02815Sjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH | 8185ca02815Sjsg PIPE_CONTROL_DC_FLUSH_ENABLE | 8195ca02815Sjsg PIPE_CONTROL_FLUSH_ENABLE); 8205ca02815Sjsg 821*69bddb60Sjsg if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) 822*69bddb60Sjsg flags |= PIPE_CONTROL_FLUSH_L3; 823*69bddb60Sjsg 824f005ef32Sjsg /* Wa_14016712196 */ 8256e5fdd49Sjsg if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) 826f005ef32Sjsg /* dummy PIPE_CONTROL + depth flush */ 827f005ef32Sjsg cs = gen12_emit_pipe_control(cs, 0, 828f005ef32Sjsg PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); 829f005ef32Sjsg 8301bb76ff1Sjsg if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) 8311bb76ff1Sjsg /* Wa_1409600907 */ 8321bb76ff1Sjsg flags |= PIPE_CONTROL_DEPTH_STALL; 8331bb76ff1Sjsg 834f005ef32Sjsg if (!HAS_3D_PIPELINE(rq->i915)) 8351bb76ff1Sjsg flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS; 8361bb76ff1Sjsg else if (rq->engine->class == COMPUTE_CLASS) 8371bb76ff1Sjsg flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS; 8381bb76ff1Sjsg 839f005ef32Sjsg cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0); 840f005ef32Sjsg 841f005ef32Sjsg /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */ 8421bb76ff1Sjsg cs = gen12_emit_ggtt_write_rcs(cs, 8431bb76ff1Sjsg rq->fence.seqno, 8441bb76ff1Sjsg hwsp_offset(rq), 845f005ef32Sjsg 0, 846f005ef32Sjsg PIPE_CONTROL_FLUSH_ENABLE | 847f005ef32Sjsg PIPE_CONTROL_CS_STALL); 8481bb76ff1Sjsg 8495ca02815Sjsg return gen12_emit_fini_breadcrumb_tail(rq, cs); 8505ca02815Sjsg } 851