1 /* $NetBSD: intel_sseu.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $ */
2
3 /*
4 * SPDX-License-Identifier: MIT
5 *
6 * Copyright © 2019 Intel Corporation
7 */
8
9 #include <sys/cdefs.h>
10 __KERNEL_RCSID(0, "$NetBSD: intel_sseu.c,v 1.3 2021/12/19 11:49:11 riastradh Exp $");
11
12 #include "i915_drv.h"
13 #include "intel_lrc_reg.h"
14 #include "intel_sseu.h"
15
intel_sseu_set_info(struct sseu_dev_info * sseu,u8 max_slices,u8 max_subslices,u8 max_eus_per_subslice)16 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
17 u8 max_subslices, u8 max_eus_per_subslice)
18 {
19 sseu->max_slices = max_slices;
20 sseu->max_subslices = max_subslices;
21 sseu->max_eus_per_subslice = max_eus_per_subslice;
22
23 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
24 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
25 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
26 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
27 }
28
29 unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info * sseu)30 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
31 {
32 unsigned int i, total = 0;
33
34 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
35 total += hweight8(sseu->subslice_mask[i]);
36
37 return total;
38 }
39
intel_sseu_get_subslices(const struct sseu_dev_info * sseu,u8 slice)40 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
41 {
42 int i, offset = slice * sseu->ss_stride;
43 u32 mask = 0;
44
45 GEM_BUG_ON(slice >= sseu->max_slices);
46
47 for (i = 0; i < sseu->ss_stride; i++)
48 mask |= (u32)sseu->subslice_mask[offset + i] <<
49 i * BITS_PER_BYTE;
50
51 return mask;
52 }
53
intel_sseu_set_subslices(struct sseu_dev_info * sseu,int slice,u32 ss_mask)54 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
55 u32 ss_mask)
56 {
57 int offset = slice * sseu->ss_stride;
58
59 memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride);
60 }
61
62 unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info * sseu,u8 slice)63 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
64 {
65 return hweight32(intel_sseu_get_subslices(sseu, slice));
66 }
67
intel_sseu_make_rpcs(struct drm_i915_private * i915,const struct intel_sseu * req_sseu)68 u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
69 const struct intel_sseu *req_sseu)
70 {
71 const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
72 bool subslice_pg = sseu->has_subslice_pg;
73 struct intel_sseu ctx_sseu;
74 u8 slices, subslices;
75 u32 rpcs = 0;
76
77 /*
78 * No explicit RPCS request is needed to ensure full
79 * slice/subslice/EU enablement prior to Gen9.
80 */
81 if (INTEL_GEN(i915) < 9)
82 return 0;
83
84 /*
85 * If i915/perf is active, we want a stable powergating configuration
86 * on the system.
87 *
88 * We could choose full enablement, but on ICL we know there are use
89 * cases which disable slices for functional, apart for performance
90 * reasons. So in this case we select a known stable subset.
91 */
92 if (!i915->perf.exclusive_stream) {
93 ctx_sseu = *req_sseu;
94 } else {
95 ctx_sseu = intel_sseu_from_device_info(sseu);
96
97 if (IS_GEN(i915, 11)) {
98 /*
99 * We only need subslice count so it doesn't matter
100 * which ones we select - just turn off low bits in the
101 * amount of half of all available subslices per slice.
102 */
103 ctx_sseu.subslice_mask =
104 ~(~0u << (hweight8(ctx_sseu.subslice_mask) / 2));
105 ctx_sseu.slice_mask = 0x1;
106 }
107 }
108
109 slices = hweight8(ctx_sseu.slice_mask);
110 subslices = hweight8(ctx_sseu.subslice_mask);
111
112 /*
113 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
114 * wide and Icelake has up to eight subslices, specfial programming is
115 * needed in order to correctly enable all subslices.
116 *
117 * According to documentation software must consider the configuration
118 * as 2x4x8 and hardware will translate this to 1x8x8.
119 *
120 * Furthemore, even though SScount is three bits, maximum documented
121 * value for it is four. From this some rules/restrictions follow:
122 *
123 * 1.
124 * If enabled subslice count is greater than four, two whole slices must
125 * be enabled instead.
126 *
127 * 2.
128 * When more than one slice is enabled, hardware ignores the subslice
129 * count altogether.
130 *
131 * From these restrictions it follows that it is not possible to enable
132 * a count of subslices between the SScount maximum of four restriction,
133 * and the maximum available number on a particular SKU. Either all
134 * subslices are enabled, or a count between one and four on the first
135 * slice.
136 */
137 if (IS_GEN(i915, 11) &&
138 slices == 1 &&
139 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
140 GEM_BUG_ON(subslices & 1);
141
142 subslice_pg = false;
143 slices *= 2;
144 }
145
146 /*
147 * Starting in Gen9, render power gating can leave
148 * slice/subslice/EU in a partially enabled state. We
149 * must make an explicit request through RPCS for full
150 * enablement.
151 */
152 if (sseu->has_slice_pg) {
153 u32 mask, val = slices;
154
155 if (INTEL_GEN(i915) >= 11) {
156 mask = GEN11_RPCS_S_CNT_MASK;
157 val <<= GEN11_RPCS_S_CNT_SHIFT;
158 } else {
159 mask = GEN8_RPCS_S_CNT_MASK;
160 val <<= GEN8_RPCS_S_CNT_SHIFT;
161 }
162
163 GEM_BUG_ON(val & ~mask);
164 val &= mask;
165
166 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
167 }
168
169 if (subslice_pg) {
170 u32 val = subslices;
171
172 val <<= GEN8_RPCS_SS_CNT_SHIFT;
173
174 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
175 val &= GEN8_RPCS_SS_CNT_MASK;
176
177 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
178 }
179
180 if (sseu->has_eu_pg) {
181 u32 val;
182
183 val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
184 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
185 val &= GEN8_RPCS_EU_MIN_MASK;
186
187 rpcs |= val;
188
189 val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
190 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
191 val &= GEN8_RPCS_EU_MAX_MASK;
192
193 rpcs |= val;
194
195 rpcs |= GEN8_RPCS_ENABLE;
196 }
197
198 return rpcs;
199 }
200