1a85cb24fSFrançois Tigeot /*
2a85cb24fSFrançois Tigeot * Copyright © 2015-2016 Intel Corporation
3a85cb24fSFrançois Tigeot *
4a85cb24fSFrançois Tigeot * Permission is hereby granted, free of charge, to any person obtaining a
5a85cb24fSFrançois Tigeot * copy of this software and associated documentation files (the "Software"),
6a85cb24fSFrançois Tigeot * to deal in the Software without restriction, including without limitation
7a85cb24fSFrançois Tigeot * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8a85cb24fSFrançois Tigeot * and/or sell copies of the Software, and to permit persons to whom the
9a85cb24fSFrançois Tigeot * Software is furnished to do so, subject to the following conditions:
10a85cb24fSFrançois Tigeot *
11a85cb24fSFrançois Tigeot * The above copyright notice and this permission notice (including the next
12a85cb24fSFrançois Tigeot * paragraph) shall be included in all copies or substantial portions of the
13a85cb24fSFrançois Tigeot * Software.
14a85cb24fSFrançois Tigeot *
15a85cb24fSFrançois Tigeot * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16a85cb24fSFrançois Tigeot * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17a85cb24fSFrançois Tigeot * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18a85cb24fSFrançois Tigeot * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19a85cb24fSFrançois Tigeot * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20a85cb24fSFrançois Tigeot * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21a85cb24fSFrançois Tigeot * IN THE SOFTWARE.
22a85cb24fSFrançois Tigeot *
23a85cb24fSFrançois Tigeot * Authors:
24a85cb24fSFrançois Tigeot * Robert Bragg <robert@sixbynine.org>
25a85cb24fSFrançois Tigeot */
26a85cb24fSFrançois Tigeot
27a85cb24fSFrançois Tigeot
28a85cb24fSFrançois Tigeot /**
29a85cb24fSFrançois Tigeot * DOC: i915 Perf Overview
30a85cb24fSFrançois Tigeot *
31a85cb24fSFrançois Tigeot * Gen graphics supports a large number of performance counters that can help
32a85cb24fSFrançois Tigeot * driver and application developers understand and optimize their use of the
33a85cb24fSFrançois Tigeot * GPU.
34a85cb24fSFrançois Tigeot *
35a85cb24fSFrançois Tigeot * This i915 perf interface enables userspace to configure and open a file
36a85cb24fSFrançois Tigeot * descriptor representing a stream of GPU metrics which can then be read() as
37a85cb24fSFrançois Tigeot * a stream of sample records.
38a85cb24fSFrançois Tigeot *
39a85cb24fSFrançois Tigeot * The interface is particularly suited to exposing buffered metrics that are
40a85cb24fSFrançois Tigeot * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
41a85cb24fSFrançois Tigeot *
42a85cb24fSFrançois Tigeot * Streams representing a single context are accessible to applications with a
43a85cb24fSFrançois Tigeot * corresponding drm file descriptor, such that OpenGL can use the interface
44a85cb24fSFrançois Tigeot * without special privileges. Access to system-wide metrics requires root
45a85cb24fSFrançois Tigeot * privileges by default, unless changed via the dev.i915.perf_event_paranoid
46a85cb24fSFrançois Tigeot * sysctl option.
47a85cb24fSFrançois Tigeot *
48a85cb24fSFrançois Tigeot */
49a85cb24fSFrançois Tigeot
50a85cb24fSFrançois Tigeot /**
51a85cb24fSFrançois Tigeot * DOC: i915 Perf History and Comparison with Core Perf
52a85cb24fSFrançois Tigeot *
53a85cb24fSFrançois Tigeot * The interface was initially inspired by the core Perf infrastructure but
54a85cb24fSFrançois Tigeot * some notable differences are:
55a85cb24fSFrançois Tigeot *
56a85cb24fSFrançois Tigeot * i915 perf file descriptors represent a "stream" instead of an "event"; where
57a85cb24fSFrançois Tigeot * a perf event primarily corresponds to a single 64bit value, while a stream
58a85cb24fSFrançois Tigeot * might sample sets of tightly-coupled counters, depending on the
59a85cb24fSFrançois Tigeot * configuration. For example the Gen OA unit isn't designed to support
60a85cb24fSFrançois Tigeot * orthogonal configurations of individual counters; it's configured for a set
61a85cb24fSFrançois Tigeot * of related counters. Samples for an i915 perf stream capturing OA metrics
62a85cb24fSFrançois Tigeot * will include a set of counter values packed in a compact HW specific format.
63a85cb24fSFrançois Tigeot * The OA unit supports a number of different packing formats which can be
64a85cb24fSFrançois Tigeot * selected by the user opening the stream. Perf has support for grouping
65a85cb24fSFrançois Tigeot * events, but each event in the group is configured, validated and
66a85cb24fSFrançois Tigeot * authenticated individually with separate system calls.
67a85cb24fSFrançois Tigeot *
68a85cb24fSFrançois Tigeot * i915 perf stream configurations are provided as an array of u64 (key,value)
69a85cb24fSFrançois Tigeot * pairs, instead of a fixed struct with multiple miscellaneous config members,
70a85cb24fSFrançois Tigeot * interleaved with event-type specific members.
71a85cb24fSFrançois Tigeot *
72a85cb24fSFrançois Tigeot * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
73a85cb24fSFrançois Tigeot * The supported metrics are being written to memory by the GPU unsynchronized
74a85cb24fSFrançois Tigeot * with the CPU, using HW specific packing formats for counter sets. Sometimes
75a85cb24fSFrançois Tigeot * the constraints on HW configuration require reports to be filtered before it
76a85cb24fSFrançois Tigeot * would be acceptable to expose them to unprivileged applications - to hide
77a85cb24fSFrançois Tigeot * the metrics of other processes/contexts. For these use cases a read() based
78a85cb24fSFrançois Tigeot * interface is a good fit, and provides an opportunity to filter data as it
79a85cb24fSFrançois Tigeot * gets copied from the GPU mapped buffers to userspace buffers.
80a85cb24fSFrançois Tigeot *
81a85cb24fSFrançois Tigeot *
82a85cb24fSFrançois Tigeot * Issues hit with first prototype based on Core Perf
83a85cb24fSFrançois Tigeot * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84a85cb24fSFrançois Tigeot *
85a85cb24fSFrançois Tigeot * The first prototype of this driver was based on the core perf
86a85cb24fSFrançois Tigeot * infrastructure, and while we did make that mostly work, with some changes to
87a85cb24fSFrançois Tigeot * perf, we found we were breaking or working around too many assumptions baked
88a85cb24fSFrançois Tigeot * into perf's currently cpu centric design.
89a85cb24fSFrançois Tigeot *
90a85cb24fSFrançois Tigeot * In the end we didn't see a clear benefit to making perf's implementation and
91a85cb24fSFrançois Tigeot * interface more complex by changing design assumptions while we knew we still
92a85cb24fSFrançois Tigeot * wouldn't be able to use any existing perf based userspace tools.
93a85cb24fSFrançois Tigeot *
94a85cb24fSFrançois Tigeot * Also considering the Gen specific nature of the Observability hardware and
95a85cb24fSFrançois Tigeot * how userspace will sometimes need to combine i915 perf OA metrics with
96a85cb24fSFrançois Tigeot * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
97a85cb24fSFrançois Tigeot * expecting the interface to be used by a platform specific userspace such as
98a85cb24fSFrançois Tigeot * OpenGL or tools. This is to say; we aren't inherently missing out on having
99a85cb24fSFrançois Tigeot * a standard vendor/architecture agnostic interface by not using perf.
100a85cb24fSFrançois Tigeot *
101a85cb24fSFrançois Tigeot *
102a85cb24fSFrançois Tigeot * For posterity, in case we might re-visit trying to adapt core perf to be
103a85cb24fSFrançois Tigeot * better suited to exposing i915 metrics these were the main pain points we
104a85cb24fSFrançois Tigeot * hit:
105a85cb24fSFrançois Tigeot *
106a85cb24fSFrançois Tigeot * - The perf based OA PMU driver broke some significant design assumptions:
107a85cb24fSFrançois Tigeot *
108a85cb24fSFrançois Tigeot * Existing perf pmus are used for profiling work on a cpu and we were
109a85cb24fSFrançois Tigeot * introducing the idea of _IS_DEVICE pmus with different security
110a85cb24fSFrançois Tigeot * implications, the need to fake cpu-related data (such as user/kernel
111a85cb24fSFrançois Tigeot * registers) to fit with perf's current design, and adding _DEVICE records
112a85cb24fSFrançois Tigeot * as a way to forward device-specific status records.
113a85cb24fSFrançois Tigeot *
114a85cb24fSFrançois Tigeot * The OA unit writes reports of counters into a circular buffer, without
115a85cb24fSFrançois Tigeot * involvement from the CPU, making our PMU driver the first of a kind.
116a85cb24fSFrançois Tigeot *
117a85cb24fSFrançois Tigeot * Given the way we were periodically forward data from the GPU-mapped, OA
118a85cb24fSFrançois Tigeot * buffer to perf's buffer, those bursts of sample writes looked to perf like
119a85cb24fSFrançois Tigeot * we were sampling too fast and so we had to subvert its throttling checks.
120a85cb24fSFrançois Tigeot *
121a85cb24fSFrançois Tigeot * Perf supports groups of counters and allows those to be read via
122a85cb24fSFrançois Tigeot * transactions internally but transactions currently seem designed to be
123a85cb24fSFrançois Tigeot * explicitly initiated from the cpu (say in response to a userspace read())
124a85cb24fSFrançois Tigeot * and while we could pull a report out of the OA buffer we can't
125a85cb24fSFrançois Tigeot * trigger a report from the cpu on demand.
126a85cb24fSFrançois Tigeot *
127a85cb24fSFrançois Tigeot * Related to being report based; the OA counters are configured in HW as a
128a85cb24fSFrançois Tigeot * set while perf generally expects counter configurations to be orthogonal.
129a85cb24fSFrançois Tigeot * Although counters can be associated with a group leader as they are
130a85cb24fSFrançois Tigeot * opened, there's no clear precedent for being able to provide group-wide
131a85cb24fSFrançois Tigeot * configuration attributes (for example we want to let userspace choose the
132a85cb24fSFrançois Tigeot * OA unit report format used to capture all counters in a set, or specify a
133a85cb24fSFrançois Tigeot * GPU context to filter metrics on). We avoided using perf's grouping
134a85cb24fSFrançois Tigeot * feature and forwarded OA reports to userspace via perf's 'raw' sample
135a85cb24fSFrançois Tigeot * field. This suited our userspace well considering how coupled the counters
136a85cb24fSFrançois Tigeot * are when dealing with normalizing. It would be inconvenient to split
137a85cb24fSFrançois Tigeot * counters up into separate events, only to require userspace to recombine
138a85cb24fSFrançois Tigeot * them. For Mesa it's also convenient to be forwarded raw, periodic reports
139a85cb24fSFrançois Tigeot * for combining with the side-band raw reports it captures using
140a85cb24fSFrançois Tigeot * MI_REPORT_PERF_COUNT commands.
141a85cb24fSFrançois Tigeot *
142a85cb24fSFrançois Tigeot * - As a side note on perf's grouping feature; there was also some concern
143a85cb24fSFrançois Tigeot * that using PERF_FORMAT_GROUP as a way to pack together counter values
144a85cb24fSFrançois Tigeot * would quite drastically inflate our sample sizes, which would likely
145a85cb24fSFrançois Tigeot * lower the effective sampling resolutions we could use when the available
146a85cb24fSFrançois Tigeot * memory bandwidth is limited.
147a85cb24fSFrançois Tigeot *
148a85cb24fSFrançois Tigeot * With the OA unit's report formats, counters are packed together as 32
149a85cb24fSFrançois Tigeot * or 40bit values, with the largest report size being 256 bytes.
150a85cb24fSFrançois Tigeot *
151a85cb24fSFrançois Tigeot * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
152a85cb24fSFrançois Tigeot * documented ordering to the values, implying PERF_FORMAT_ID must also be
153a85cb24fSFrançois Tigeot * used to add a 64bit ID before each value; giving 16 bytes per counter.
154a85cb24fSFrançois Tigeot *
155a85cb24fSFrançois Tigeot * Related to counter orthogonality; we can't time share the OA unit, while
156a85cb24fSFrançois Tigeot * event scheduling is a central design idea within perf for allowing
157a85cb24fSFrançois Tigeot * userspace to open + enable more events than can be configured in HW at any
158a85cb24fSFrançois Tigeot * one time. The OA unit is not designed to allow re-configuration while in
159a85cb24fSFrançois Tigeot * use. We can't reconfigure the OA unit without losing internal OA unit
160a85cb24fSFrançois Tigeot * state which we can't access explicitly to save and restore. Reconfiguring
161a85cb24fSFrançois Tigeot * the OA unit is also relatively slow, involving ~100 register writes. From
162a85cb24fSFrançois Tigeot * userspace Mesa also depends on a stable OA configuration when emitting
163a85cb24fSFrançois Tigeot * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
164a85cb24fSFrançois Tigeot * disabled while there are outstanding MI_RPC commands lest we hang the
165a85cb24fSFrançois Tigeot * command streamer.
166a85cb24fSFrançois Tigeot *
167a85cb24fSFrançois Tigeot * The contents of sample records aren't extensible by device drivers (i.e.
168a85cb24fSFrançois Tigeot * the sample_type bits). As an example; Sourab Gupta had been looking to
169a85cb24fSFrançois Tigeot * attach GPU timestamps to our OA samples. We were shoehorning OA reports
170a85cb24fSFrançois Tigeot * into sample records by using the 'raw' field, but it's tricky to pack more
171a85cb24fSFrançois Tigeot * than one thing into this field because events/core.c currently only lets a
172a85cb24fSFrançois Tigeot * pmu give a single raw data pointer plus len which will be copied into the
173a85cb24fSFrançois Tigeot * ring buffer. To include more than the OA report we'd have to copy the
174a85cb24fSFrançois Tigeot * report into an intermediate larger buffer. I'd been considering allowing a
175a85cb24fSFrançois Tigeot * vector of data+len values to be specified for copying the raw data, but
176a85cb24fSFrançois Tigeot * it felt like a kludge to being using the raw field for this purpose.
177a85cb24fSFrançois Tigeot *
178a85cb24fSFrançois Tigeot * - It felt like our perf based PMU was making some technical compromises
179a85cb24fSFrançois Tigeot * just for the sake of using perf:
180a85cb24fSFrançois Tigeot *
181a85cb24fSFrançois Tigeot * perf_event_open() requires events to either relate to a pid or a specific
182a85cb24fSFrançois Tigeot * cpu core, while our device pmu related to neither. Events opened with a
183a85cb24fSFrançois Tigeot * pid will be automatically enabled/disabled according to the scheduling of
184a85cb24fSFrançois Tigeot * that process - so not appropriate for us. When an event is related to a
185a85cb24fSFrançois Tigeot * cpu id, perf ensures pmu methods will be invoked via an inter process
186a85cb24fSFrançois Tigeot * interrupt on that core. To avoid invasive changes our userspace opened OA
187a85cb24fSFrançois Tigeot * perf events for a specific cpu. This was workable but it meant the
188a85cb24fSFrançois Tigeot * majority of the OA driver ran in atomic context, including all OA report
189a85cb24fSFrançois Tigeot * forwarding, which wasn't really necessary in our case and seems to make
190a85cb24fSFrançois Tigeot * our locking requirements somewhat complex as we handled the interaction
191a85cb24fSFrançois Tigeot * with the rest of the i915 driver.
192a85cb24fSFrançois Tigeot */
193a85cb24fSFrançois Tigeot
194a85cb24fSFrançois Tigeot #include <linux/anon_inodes.h>
195a85cb24fSFrançois Tigeot #include <linux/sizes.h>
196*3f2dd94aSFrançois Tigeot #include <linux/uuid.h>
197a85cb24fSFrançois Tigeot
198a85cb24fSFrançois Tigeot #include "i915_drv.h"
199a85cb24fSFrançois Tigeot #include "i915_oa_hsw.h"
200*3f2dd94aSFrançois Tigeot #include "i915_oa_bdw.h"
201*3f2dd94aSFrançois Tigeot #include "i915_oa_chv.h"
202*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt2.h"
203*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt3.h"
204*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt4.h"
205*3f2dd94aSFrançois Tigeot #include "i915_oa_bxt.h"
206*3f2dd94aSFrançois Tigeot #include "i915_oa_kblgt2.h"
207*3f2dd94aSFrançois Tigeot #include "i915_oa_kblgt3.h"
208*3f2dd94aSFrançois Tigeot #include "i915_oa_glk.h"
209*3f2dd94aSFrançois Tigeot #include "i915_oa_cflgt2.h"
210a85cb24fSFrançois Tigeot
211a85cb24fSFrançois Tigeot /* HW requires this to be a power of two, between 128k and 16M, though driver
212a85cb24fSFrançois Tigeot * is currently generally designed assuming the largest 16M size is used such
213a85cb24fSFrançois Tigeot * that the overflow cases are unlikely in normal operation.
214a85cb24fSFrançois Tigeot */
215a85cb24fSFrançois Tigeot #define OA_BUFFER_SIZE SZ_16M
216a85cb24fSFrançois Tigeot
217a85cb24fSFrançois Tigeot #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
218a85cb24fSFrançois Tigeot
219a85cb24fSFrançois Tigeot /* There's a HW race condition between OA unit tail pointer register updates and
220a85cb24fSFrançois Tigeot * writes to memory whereby the tail pointer can sometimes get ahead of what's
221a85cb24fSFrançois Tigeot * been written out to the OA buffer so far.
222a85cb24fSFrançois Tigeot *
223a85cb24fSFrançois Tigeot * Although this can be observed explicitly by checking for a zeroed report-id
224a85cb24fSFrançois Tigeot * field in tail reports, it seems preferable to account for this earlier e.g.
225a85cb24fSFrançois Tigeot * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
226a85cb24fSFrançois Tigeot * in this situation.
227a85cb24fSFrançois Tigeot *
228a85cb24fSFrançois Tigeot * To give time for the most recent reports to land before they may be copied to
229a85cb24fSFrançois Tigeot * userspace, the driver operates as if the tail pointer effectively lags behind
230a85cb24fSFrançois Tigeot * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
231a85cb24fSFrançois Tigeot * based on this constant in nanoseconds, the current OA sampling exponent
232a85cb24fSFrançois Tigeot * and current report size.
233a85cb24fSFrançois Tigeot *
234a85cb24fSFrançois Tigeot * There is also a fallback check while reading to simply skip over reports with
235a85cb24fSFrançois Tigeot * a zeroed report-id.
236a85cb24fSFrançois Tigeot */
237a85cb24fSFrançois Tigeot #define OA_TAIL_MARGIN_NSEC 100000ULL
238a85cb24fSFrançois Tigeot
239a85cb24fSFrançois Tigeot /* frequency for checking whether the OA unit has written new reports to the
240a85cb24fSFrançois Tigeot * circular OA buffer...
241a85cb24fSFrançois Tigeot */
242a85cb24fSFrançois Tigeot #define POLL_FREQUENCY 200
243a85cb24fSFrançois Tigeot #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
244a85cb24fSFrançois Tigeot
245a85cb24fSFrançois Tigeot #if 0
246a85cb24fSFrançois Tigeot /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
247a85cb24fSFrançois Tigeot static int zero;
248a85cb24fSFrançois Tigeot static int one = 1;
249a85cb24fSFrançois Tigeot static u32 i915_perf_stream_paranoid = true;
250a85cb24fSFrançois Tigeot
251a85cb24fSFrançois Tigeot /* The maximum exponent the hardware accepts is 63 (essentially it selects one
252a85cb24fSFrançois Tigeot * of the 64bit timestamp bits to trigger reports from) but there's currently
253a85cb24fSFrançois Tigeot * no known use case for sampling as infrequently as once per 47 thousand years.
254a85cb24fSFrançois Tigeot *
255a85cb24fSFrançois Tigeot * Since the timestamps included in OA reports are only 32bits it seems
256a85cb24fSFrançois Tigeot * reasonable to limit the OA exponent where it's still possible to account for
257a85cb24fSFrançois Tigeot * overflow in OA report timestamps.
258a85cb24fSFrançois Tigeot */
259a85cb24fSFrançois Tigeot #define OA_EXPONENT_MAX 31
260a85cb24fSFrançois Tigeot
261a85cb24fSFrançois Tigeot #define INVALID_CTX_ID 0xffffffff
262a85cb24fSFrançois Tigeot
263*3f2dd94aSFrançois Tigeot /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
264*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_MASK 0x3f
265*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_SHIFT 19
266*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_TIMER (1<<0)
267*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_CTX_SWITCH (1<<3)
268*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_CLK_RATIO (1<<5)
269*3f2dd94aSFrançois Tigeot
270a85cb24fSFrançois Tigeot
271a85cb24fSFrançois Tigeot /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
272a85cb24fSFrançois Tigeot *
273*3f2dd94aSFrançois Tigeot * The highest sampling frequency we can theoretically program the OA unit
274*3f2dd94aSFrançois Tigeot * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
275*3f2dd94aSFrançois Tigeot *
276*3f2dd94aSFrançois Tigeot * Initialized just before we register the sysctl parameter.
277a85cb24fSFrançois Tigeot */
278*3f2dd94aSFrançois Tigeot static int oa_sample_rate_hard_limit;
279a85cb24fSFrançois Tigeot
280a85cb24fSFrançois Tigeot /* Theoretically we can program the OA unit to sample every 160ns but don't
281a85cb24fSFrançois Tigeot * allow that by default unless root...
282a85cb24fSFrançois Tigeot *
283a85cb24fSFrançois Tigeot * The default threshold of 100000Hz is based on perf's similar
284a85cb24fSFrançois Tigeot * kernel.perf_event_max_sample_rate sysctl parameter.
285a85cb24fSFrançois Tigeot */
286a85cb24fSFrançois Tigeot static u32 i915_oa_max_sample_rate = 100000;
287a85cb24fSFrançois Tigeot
288a85cb24fSFrançois Tigeot /* XXX: beware if future OA HW adds new report formats that the current
289a85cb24fSFrançois Tigeot * code assumes all reports have a power-of-two size and ~(size - 1) can
290a85cb24fSFrançois Tigeot * be used as a mask to align the OA tail pointer.
291a85cb24fSFrançois Tigeot */
292a85cb24fSFrançois Tigeot static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
293a85cb24fSFrançois Tigeot [I915_OA_FORMAT_A13] = { 0, 64 },
294a85cb24fSFrançois Tigeot [I915_OA_FORMAT_A29] = { 1, 128 },
295a85cb24fSFrançois Tigeot [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
296a85cb24fSFrançois Tigeot /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
297a85cb24fSFrançois Tigeot [I915_OA_FORMAT_B4_C8] = { 4, 64 },
298a85cb24fSFrançois Tigeot [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
299a85cb24fSFrançois Tigeot [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
300a85cb24fSFrançois Tigeot [I915_OA_FORMAT_C4_B8] = { 7, 64 },
301a85cb24fSFrançois Tigeot };
302*3f2dd94aSFrançois Tigeot
303*3f2dd94aSFrançois Tigeot static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
304*3f2dd94aSFrançois Tigeot [I915_OA_FORMAT_A12] = { 0, 64 },
305*3f2dd94aSFrançois Tigeot [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
306*3f2dd94aSFrançois Tigeot [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
307*3f2dd94aSFrançois Tigeot [I915_OA_FORMAT_C4_B8] = { 7, 64 },
308*3f2dd94aSFrançois Tigeot };
309a85cb24fSFrançois Tigeot #endif
310a85cb24fSFrançois Tigeot
311a85cb24fSFrançois Tigeot #define SAMPLE_OA_REPORT (1<<0)
312a85cb24fSFrançois Tigeot
313a85cb24fSFrançois Tigeot /**
314a85cb24fSFrançois Tigeot * struct perf_open_properties - for validated properties given to open a stream
315a85cb24fSFrançois Tigeot * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
316a85cb24fSFrançois Tigeot * @single_context: Whether a single or all gpu contexts should be monitored
317a85cb24fSFrançois Tigeot * @ctx_handle: A gem ctx handle for use with @single_context
318a85cb24fSFrançois Tigeot * @metrics_set: An ID for an OA unit metric set advertised via sysfs
319a85cb24fSFrançois Tigeot * @oa_format: An OA unit HW report format
320a85cb24fSFrançois Tigeot * @oa_periodic: Whether to enable periodic OA unit sampling
321a85cb24fSFrançois Tigeot * @oa_period_exponent: The OA unit sampling period is derived from this
322a85cb24fSFrançois Tigeot *
323a85cb24fSFrançois Tigeot * As read_properties_unlocked() enumerates and validates the properties given
324a85cb24fSFrançois Tigeot * to open a stream of metrics the configuration is built up in the structure
325a85cb24fSFrançois Tigeot * which starts out zero initialized.
326a85cb24fSFrançois Tigeot */
327a85cb24fSFrançois Tigeot struct perf_open_properties {
328a85cb24fSFrançois Tigeot u32 sample_flags;
329a85cb24fSFrançois Tigeot
330a85cb24fSFrançois Tigeot u64 single_context:1;
331a85cb24fSFrançois Tigeot u64 ctx_handle;
332a85cb24fSFrançois Tigeot
333a85cb24fSFrançois Tigeot /* OA sampling state */
334a85cb24fSFrançois Tigeot int metrics_set;
335a85cb24fSFrançois Tigeot int oa_format;
336a85cb24fSFrançois Tigeot bool oa_periodic;
337a85cb24fSFrançois Tigeot int oa_period_exponent;
338a85cb24fSFrançois Tigeot };
339a85cb24fSFrançois Tigeot
340a85cb24fSFrançois Tigeot #if 0
341a85cb24fSFrançois Tigeot /* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
342a85cb24fSFrançois Tigeot *
343a85cb24fSFrançois Tigeot * It's safe to read OA config state here unlocked, assuming that this is only
344a85cb24fSFrançois Tigeot * called while the stream is enabled, while the global OA configuration can't
345a85cb24fSFrançois Tigeot * be modified.
346a85cb24fSFrançois Tigeot *
347a85cb24fSFrançois Tigeot * Note: we don't lock around the head/tail reads even though there's the slim
348a85cb24fSFrançois Tigeot * possibility of read() fop errors forcing a re-init of the OA buffer
349a85cb24fSFrançois Tigeot * pointers. A race here could result in a false positive !empty status which
350a85cb24fSFrançois Tigeot * is acceptable.
351a85cb24fSFrançois Tigeot */
352a85cb24fSFrançois Tigeot static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
353a85cb24fSFrançois Tigeot {
354a85cb24fSFrançois Tigeot int report_size = dev_priv->perf.oa.oa_buffer.format_size;
355a85cb24fSFrançois Tigeot u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
356a85cb24fSFrançois Tigeot u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
357a85cb24fSFrançois Tigeot u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
358a85cb24fSFrançois Tigeot u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
359a85cb24fSFrançois Tigeot
360a85cb24fSFrançois Tigeot return OA_TAKEN(tail, head) <
361a85cb24fSFrançois Tigeot dev_priv->perf.oa.tail_margin + report_size;
362a85cb24fSFrançois Tigeot }
363a85cb24fSFrançois Tigeot
364a85cb24fSFrançois Tigeot /**
365a85cb24fSFrançois Tigeot * append_oa_status - Appends a status record to a userspace read() buffer.
366a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
367a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
368a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
369a85cb24fSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
370a85cb24fSFrançois Tigeot * @type: The kind of status to report to userspace
371a85cb24fSFrançois Tigeot *
372a85cb24fSFrançois Tigeot * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
373a85cb24fSFrançois Tigeot * into the userspace read() buffer.
374a85cb24fSFrançois Tigeot *
375a85cb24fSFrançois Tigeot * The @buf @offset will only be updated on success.
376a85cb24fSFrançois Tigeot *
377a85cb24fSFrançois Tigeot * Returns: 0 on success, negative error code on failure.
378a85cb24fSFrançois Tigeot */
379a85cb24fSFrançois Tigeot static int append_oa_status(struct i915_perf_stream *stream,
380a85cb24fSFrançois Tigeot char __user *buf,
381a85cb24fSFrançois Tigeot size_t count,
382a85cb24fSFrançois Tigeot size_t *offset,
383a85cb24fSFrançois Tigeot enum drm_i915_perf_record_type type)
384a85cb24fSFrançois Tigeot {
385a85cb24fSFrançois Tigeot struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
386a85cb24fSFrançois Tigeot
387a85cb24fSFrançois Tigeot if ((count - *offset) < header.size)
388a85cb24fSFrançois Tigeot return -ENOSPC;
389a85cb24fSFrançois Tigeot
390a85cb24fSFrançois Tigeot if (copy_to_user(buf + *offset, &header, sizeof(header)))
391a85cb24fSFrançois Tigeot return -EFAULT;
392a85cb24fSFrançois Tigeot
393a85cb24fSFrançois Tigeot (*offset) += header.size;
394a85cb24fSFrançois Tigeot
395a85cb24fSFrançois Tigeot return 0;
396a85cb24fSFrançois Tigeot }
397a85cb24fSFrançois Tigeot
398a85cb24fSFrançois Tigeot /**
399a85cb24fSFrançois Tigeot * append_oa_sample - Copies single OA report into userspace read() buffer.
400a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
401a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
402a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
403a85cb24fSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
404a85cb24fSFrançois Tigeot * @report: A single OA report to (optionally) include as part of the sample
405a85cb24fSFrançois Tigeot *
406a85cb24fSFrançois Tigeot * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
407a85cb24fSFrançois Tigeot * properties when opening a stream, tracked as `stream->sample_flags`. This
408a85cb24fSFrançois Tigeot * function copies the requested components of a single sample to the given
409a85cb24fSFrançois Tigeot * read() @buf.
410a85cb24fSFrançois Tigeot *
411a85cb24fSFrançois Tigeot * The @buf @offset will only be updated on success.
412a85cb24fSFrançois Tigeot *
413a85cb24fSFrançois Tigeot * Returns: 0 on success, negative error code on failure.
414a85cb24fSFrançois Tigeot */
415a85cb24fSFrançois Tigeot static int append_oa_sample(struct i915_perf_stream *stream,
416a85cb24fSFrançois Tigeot char __user *buf,
417a85cb24fSFrançois Tigeot size_t count,
418a85cb24fSFrançois Tigeot size_t *offset,
419a85cb24fSFrançois Tigeot const u8 *report)
420a85cb24fSFrançois Tigeot {
421a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
422a85cb24fSFrançois Tigeot int report_size = dev_priv->perf.oa.oa_buffer.format_size;
423a85cb24fSFrançois Tigeot struct drm_i915_perf_record_header header;
424a85cb24fSFrançois Tigeot u32 sample_flags = stream->sample_flags;
425a85cb24fSFrançois Tigeot
426a85cb24fSFrançois Tigeot header.type = DRM_I915_PERF_RECORD_SAMPLE;
427a85cb24fSFrançois Tigeot header.pad = 0;
428a85cb24fSFrançois Tigeot header.size = stream->sample_size;
429a85cb24fSFrançois Tigeot
430a85cb24fSFrançois Tigeot if ((count - *offset) < header.size)
431a85cb24fSFrançois Tigeot return -ENOSPC;
432a85cb24fSFrançois Tigeot
433a85cb24fSFrançois Tigeot buf += *offset;
434a85cb24fSFrançois Tigeot if (copy_to_user(buf, &header, sizeof(header)))
435a85cb24fSFrançois Tigeot return -EFAULT;
436a85cb24fSFrançois Tigeot buf += sizeof(header);
437a85cb24fSFrançois Tigeot
438a85cb24fSFrançois Tigeot if (sample_flags & SAMPLE_OA_REPORT) {
439a85cb24fSFrançois Tigeot if (copy_to_user(buf, report, report_size))
440a85cb24fSFrançois Tigeot return -EFAULT;
441a85cb24fSFrançois Tigeot }
442a85cb24fSFrançois Tigeot
443a85cb24fSFrançois Tigeot (*offset) += header.size;
444a85cb24fSFrançois Tigeot
445a85cb24fSFrançois Tigeot return 0;
446a85cb24fSFrançois Tigeot }
447a85cb24fSFrançois Tigeot
448a85cb24fSFrançois Tigeot /**
449a85cb24fSFrançois Tigeot * Copies all buffered OA reports into userspace read() buffer.
450a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
451a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
452a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
453a85cb24fSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
454*3f2dd94aSFrançois Tigeot *
455*3f2dd94aSFrançois Tigeot * Notably any error condition resulting in a short read (-%ENOSPC or
456*3f2dd94aSFrançois Tigeot * -%EFAULT) will be returned even though one or more records may
457*3f2dd94aSFrançois Tigeot * have been successfully copied. In this case it's up to the caller
458*3f2dd94aSFrançois Tigeot * to decide if the error should be squashed before returning to
459*3f2dd94aSFrançois Tigeot * userspace.
460*3f2dd94aSFrançois Tigeot *
461*3f2dd94aSFrançois Tigeot * Note: reports are consumed from the head, and appended to the
462*3f2dd94aSFrançois Tigeot * tail, so the tail chases the head?... If you think that's mad
463*3f2dd94aSFrançois Tigeot * and back-to-front you're not alone, but this follows the
464*3f2dd94aSFrançois Tigeot * Gen PRM naming convention.
465*3f2dd94aSFrançois Tigeot *
466*3f2dd94aSFrançois Tigeot * Returns: 0 on success, negative error code on failure.
467*3f2dd94aSFrançois Tigeot */
468*3f2dd94aSFrançois Tigeot static int gen8_append_oa_reports(struct i915_perf_stream *stream,
469*3f2dd94aSFrançois Tigeot char __user *buf,
470*3f2dd94aSFrançois Tigeot size_t count,
471*3f2dd94aSFrançois Tigeot size_t *offset)
472*3f2dd94aSFrançois Tigeot {
473*3f2dd94aSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
474*3f2dd94aSFrançois Tigeot int report_size = dev_priv->perf.oa.oa_buffer.format_size;
475*3f2dd94aSFrançois Tigeot u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
476*3f2dd94aSFrançois Tigeot u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
477*3f2dd94aSFrançois Tigeot u32 mask = (OA_BUFFER_SIZE - 1);
478*3f2dd94aSFrançois Tigeot size_t start_offset = *offset;
479*3f2dd94aSFrançois Tigeot unsigned long flags;
480*3f2dd94aSFrançois Tigeot unsigned int aged_tail_idx;
481*3f2dd94aSFrançois Tigeot u32 head, tail;
482*3f2dd94aSFrançois Tigeot u32 taken;
483*3f2dd94aSFrançois Tigeot int ret = 0;
484*3f2dd94aSFrançois Tigeot
485*3f2dd94aSFrançois Tigeot if (WARN_ON(!stream->enabled))
486*3f2dd94aSFrançois Tigeot return -EIO;
487*3f2dd94aSFrançois Tigeot
488*3f2dd94aSFrançois Tigeot spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
489*3f2dd94aSFrançois Tigeot
490*3f2dd94aSFrançois Tigeot head = dev_priv->perf.oa.oa_buffer.head;
491*3f2dd94aSFrançois Tigeot aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
492*3f2dd94aSFrançois Tigeot tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
493*3f2dd94aSFrançois Tigeot
494*3f2dd94aSFrançois Tigeot spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
495*3f2dd94aSFrançois Tigeot
496*3f2dd94aSFrançois Tigeot /*
497*3f2dd94aSFrançois Tigeot * An invalid tail pointer here means we're still waiting for the poll
498*3f2dd94aSFrançois Tigeot * hrtimer callback to give us a pointer
499*3f2dd94aSFrançois Tigeot */
500*3f2dd94aSFrançois Tigeot if (tail == INVALID_TAIL_PTR)
501*3f2dd94aSFrançois Tigeot return -EAGAIN;
502*3f2dd94aSFrançois Tigeot
503*3f2dd94aSFrançois Tigeot /*
504*3f2dd94aSFrançois Tigeot * NB: oa_buffer.head/tail include the gtt_offset which we don't want
505*3f2dd94aSFrançois Tigeot * while indexing relative to oa_buf_base.
506*3f2dd94aSFrançois Tigeot */
507*3f2dd94aSFrançois Tigeot head -= gtt_offset;
508*3f2dd94aSFrançois Tigeot tail -= gtt_offset;
509*3f2dd94aSFrançois Tigeot
510*3f2dd94aSFrançois Tigeot /*
511*3f2dd94aSFrançois Tigeot * An out of bounds or misaligned head or tail pointer implies a driver
512*3f2dd94aSFrançois Tigeot * bug since we validate + align the tail pointers we read from the
513*3f2dd94aSFrançois Tigeot * hardware and we are in full control of the head pointer which should
514*3f2dd94aSFrançois Tigeot * only be incremented by multiples of the report size (notably also
515*3f2dd94aSFrançois Tigeot * all a power of two).
516*3f2dd94aSFrançois Tigeot */
517*3f2dd94aSFrançois Tigeot if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
518*3f2dd94aSFrançois Tigeot tail > OA_BUFFER_SIZE || tail % report_size,
519*3f2dd94aSFrançois Tigeot "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
520*3f2dd94aSFrançois Tigeot head, tail))
521*3f2dd94aSFrançois Tigeot return -EIO;
522*3f2dd94aSFrançois Tigeot
523*3f2dd94aSFrançois Tigeot
524*3f2dd94aSFrançois Tigeot for (/* none */;
525*3f2dd94aSFrançois Tigeot (taken = OA_TAKEN(tail, head));
526*3f2dd94aSFrançois Tigeot head = (head + report_size) & mask) {
527*3f2dd94aSFrançois Tigeot u8 *report = oa_buf_base + head;
528*3f2dd94aSFrançois Tigeot u32 *report32 = (void *)report;
529*3f2dd94aSFrançois Tigeot u32 ctx_id;
530*3f2dd94aSFrançois Tigeot u32 reason;
531*3f2dd94aSFrançois Tigeot
532*3f2dd94aSFrançois Tigeot /*
533*3f2dd94aSFrançois Tigeot * All the report sizes factor neatly into the buffer
534*3f2dd94aSFrançois Tigeot * size so we never expect to see a report split
535*3f2dd94aSFrançois Tigeot * between the beginning and end of the buffer.
536*3f2dd94aSFrançois Tigeot *
537*3f2dd94aSFrançois Tigeot * Given the initial alignment check a misalignment
538*3f2dd94aSFrançois Tigeot * here would imply a driver bug that would result
539*3f2dd94aSFrançois Tigeot * in an overrun.
540*3f2dd94aSFrançois Tigeot */
541*3f2dd94aSFrançois Tigeot if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
542*3f2dd94aSFrançois Tigeot DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
543*3f2dd94aSFrançois Tigeot break;
544*3f2dd94aSFrançois Tigeot }
545*3f2dd94aSFrançois Tigeot
546*3f2dd94aSFrançois Tigeot /*
547*3f2dd94aSFrançois Tigeot * The reason field includes flags identifying what
548*3f2dd94aSFrançois Tigeot * triggered this specific report (mostly timer
549*3f2dd94aSFrançois Tigeot * triggered or e.g. due to a context switch).
550*3f2dd94aSFrançois Tigeot *
551*3f2dd94aSFrançois Tigeot * This field is never expected to be zero so we can
552*3f2dd94aSFrançois Tigeot * check that the report isn't invalid before copying
553*3f2dd94aSFrançois Tigeot * it to userspace...
554*3f2dd94aSFrançois Tigeot */
555*3f2dd94aSFrançois Tigeot reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
556*3f2dd94aSFrançois Tigeot OAREPORT_REASON_MASK);
557*3f2dd94aSFrançois Tigeot if (reason == 0) {
558*3f2dd94aSFrançois Tigeot if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
559*3f2dd94aSFrançois Tigeot DRM_NOTE("Skipping spurious, invalid OA report\n");
560*3f2dd94aSFrançois Tigeot continue;
561*3f2dd94aSFrançois Tigeot }
562*3f2dd94aSFrançois Tigeot
563*3f2dd94aSFrançois Tigeot /*
564*3f2dd94aSFrançois Tigeot * XXX: Just keep the lower 21 bits for now since I'm not
565*3f2dd94aSFrançois Tigeot * entirely sure if the HW touches any of the higher bits in
566*3f2dd94aSFrançois Tigeot * this field
567*3f2dd94aSFrançois Tigeot */
568*3f2dd94aSFrançois Tigeot ctx_id = report32[2] & 0x1fffff;
569*3f2dd94aSFrançois Tigeot
570*3f2dd94aSFrançois Tigeot /*
571*3f2dd94aSFrançois Tigeot * Squash whatever is in the CTX_ID field if it's marked as
572*3f2dd94aSFrançois Tigeot * invalid to be sure we avoid false-positive, single-context
573*3f2dd94aSFrançois Tigeot * filtering below...
574*3f2dd94aSFrançois Tigeot *
575*3f2dd94aSFrançois Tigeot * Note: that we don't clear the valid_ctx_bit so userspace can
576*3f2dd94aSFrançois Tigeot * understand that the ID has been squashed by the kernel.
577*3f2dd94aSFrançois Tigeot */
578*3f2dd94aSFrançois Tigeot if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
579*3f2dd94aSFrançois Tigeot ctx_id = report32[2] = INVALID_CTX_ID;
580*3f2dd94aSFrançois Tigeot
581*3f2dd94aSFrançois Tigeot /*
582*3f2dd94aSFrançois Tigeot * NB: For Gen 8 the OA unit no longer supports clock gating
583*3f2dd94aSFrançois Tigeot * off for a specific context and the kernel can't securely
584*3f2dd94aSFrançois Tigeot * stop the counters from updating as system-wide / global
585*3f2dd94aSFrançois Tigeot * values.
586*3f2dd94aSFrançois Tigeot *
587*3f2dd94aSFrançois Tigeot * Automatic reports now include a context ID so reports can be
588*3f2dd94aSFrançois Tigeot * filtered on the cpu but it's not worth trying to
589*3f2dd94aSFrançois Tigeot * automatically subtract/hide counter progress for other
590*3f2dd94aSFrançois Tigeot * contexts while filtering since we can't stop userspace
591*3f2dd94aSFrançois Tigeot * issuing MI_REPORT_PERF_COUNT commands which would still
592*3f2dd94aSFrançois Tigeot * provide a side-band view of the real values.
593*3f2dd94aSFrançois Tigeot *
594*3f2dd94aSFrançois Tigeot * To allow userspace (such as Mesa/GL_INTEL_performance_query)
595*3f2dd94aSFrançois Tigeot * to normalize counters for a single filtered context then it
596*3f2dd94aSFrançois Tigeot * needs be forwarded bookend context-switch reports so that it
597*3f2dd94aSFrançois Tigeot * can track switches in between MI_REPORT_PERF_COUNT commands
598*3f2dd94aSFrançois Tigeot * and can itself subtract/ignore the progress of counters
599*3f2dd94aSFrançois Tigeot * associated with other contexts. Note that the hardware
600*3f2dd94aSFrançois Tigeot * automatically triggers reports when switching to a new
601*3f2dd94aSFrançois Tigeot * context which are tagged with the ID of the newly active
602*3f2dd94aSFrançois Tigeot * context. To avoid the complexity (and likely fragility) of
603*3f2dd94aSFrançois Tigeot * reading ahead while parsing reports to try and minimize
604*3f2dd94aSFrançois Tigeot * forwarding redundant context switch reports (i.e. between
605*3f2dd94aSFrançois Tigeot * other, unrelated contexts) we simply elect to forward them
606*3f2dd94aSFrançois Tigeot * all.
607*3f2dd94aSFrançois Tigeot *
608*3f2dd94aSFrançois Tigeot * We don't rely solely on the reason field to identify context
609*3f2dd94aSFrançois Tigeot * switches since it's not-uncommon for periodic samples to
610*3f2dd94aSFrançois Tigeot * identify a switch before any 'context switch' report.
611*3f2dd94aSFrançois Tigeot */
612*3f2dd94aSFrançois Tigeot if (!dev_priv->perf.oa.exclusive_stream->ctx ||
613*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id == ctx_id ||
614*3f2dd94aSFrançois Tigeot (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
615*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id) ||
616*3f2dd94aSFrançois Tigeot reason & OAREPORT_REASON_CTX_SWITCH) {
617*3f2dd94aSFrançois Tigeot
618*3f2dd94aSFrançois Tigeot /*
619*3f2dd94aSFrançois Tigeot * While filtering for a single context we avoid
620*3f2dd94aSFrançois Tigeot * leaking the IDs of other contexts.
621*3f2dd94aSFrançois Tigeot */
622*3f2dd94aSFrançois Tigeot if (dev_priv->perf.oa.exclusive_stream->ctx &&
623*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id != ctx_id) {
624*3f2dd94aSFrançois Tigeot report32[2] = INVALID_CTX_ID;
625*3f2dd94aSFrançois Tigeot }
626*3f2dd94aSFrançois Tigeot
627*3f2dd94aSFrançois Tigeot ret = append_oa_sample(stream, buf, count, offset,
628*3f2dd94aSFrançois Tigeot report);
629*3f2dd94aSFrançois Tigeot if (ret)
630*3f2dd94aSFrançois Tigeot break;
631*3f2dd94aSFrançois Tigeot
632*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
633*3f2dd94aSFrançois Tigeot }
634*3f2dd94aSFrançois Tigeot
635*3f2dd94aSFrançois Tigeot /*
636*3f2dd94aSFrançois Tigeot * The above reason field sanity check is based on
637*3f2dd94aSFrançois Tigeot * the assumption that the OA buffer is initially
638*3f2dd94aSFrançois Tigeot * zeroed and we reset the field after copying so the
639*3f2dd94aSFrançois Tigeot * check is still meaningful once old reports start
640*3f2dd94aSFrançois Tigeot * being overwritten.
641*3f2dd94aSFrançois Tigeot */
642*3f2dd94aSFrançois Tigeot report32[0] = 0;
643*3f2dd94aSFrançois Tigeot }
644*3f2dd94aSFrançois Tigeot
645*3f2dd94aSFrançois Tigeot if (start_offset != *offset) {
646*3f2dd94aSFrançois Tigeot spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
647*3f2dd94aSFrançois Tigeot
648*3f2dd94aSFrançois Tigeot /*
649*3f2dd94aSFrançois Tigeot * We removed the gtt_offset for the copy loop above, indexing
650*3f2dd94aSFrançois Tigeot * relative to oa_buf_base so put back here...
651*3f2dd94aSFrançois Tigeot */
652*3f2dd94aSFrançois Tigeot head += gtt_offset;
653*3f2dd94aSFrançois Tigeot
654*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
655*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.head = head;
656*3f2dd94aSFrançois Tigeot
657*3f2dd94aSFrançois Tigeot spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
658*3f2dd94aSFrançois Tigeot }
659*3f2dd94aSFrançois Tigeot
660*3f2dd94aSFrançois Tigeot return ret;
661*3f2dd94aSFrançois Tigeot }
662*3f2dd94aSFrançois Tigeot
663*3f2dd94aSFrançois Tigeot /**
664*3f2dd94aSFrançois Tigeot * gen8_oa_read - copy status records then buffered OA reports
665*3f2dd94aSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
666*3f2dd94aSFrançois Tigeot * @buf: destination buffer given by userspace
667*3f2dd94aSFrançois Tigeot * @count: the number of bytes userspace wants to read
668*3f2dd94aSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
669*3f2dd94aSFrançois Tigeot *
670*3f2dd94aSFrançois Tigeot * Checks OA unit status registers and if necessary appends corresponding
671*3f2dd94aSFrançois Tigeot * status records for userspace (such as for a buffer full condition) and then
672*3f2dd94aSFrançois Tigeot * initiate appending any buffered OA reports.
673*3f2dd94aSFrançois Tigeot *
674*3f2dd94aSFrançois Tigeot * Updates @offset according to the number of bytes successfully copied into
675*3f2dd94aSFrançois Tigeot * the userspace buffer.
676*3f2dd94aSFrançois Tigeot *
677*3f2dd94aSFrançois Tigeot * NB: some data may be successfully copied to the userspace buffer
678*3f2dd94aSFrançois Tigeot * even if an error is returned, and this is reflected in the
679*3f2dd94aSFrançois Tigeot * updated @offset.
680*3f2dd94aSFrançois Tigeot *
681*3f2dd94aSFrançois Tigeot * Returns: zero on success or a negative error code
682*3f2dd94aSFrançois Tigeot */
683*3f2dd94aSFrançois Tigeot static int gen8_oa_read(struct i915_perf_stream *stream,
684*3f2dd94aSFrançois Tigeot char __user *buf,
685*3f2dd94aSFrançois Tigeot size_t count,
686*3f2dd94aSFrançois Tigeot size_t *offset)
687*3f2dd94aSFrançois Tigeot {
688*3f2dd94aSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
689*3f2dd94aSFrançois Tigeot u32 oastatus;
690*3f2dd94aSFrançois Tigeot int ret;
691*3f2dd94aSFrançois Tigeot
692*3f2dd94aSFrançois Tigeot if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
693*3f2dd94aSFrançois Tigeot return -EIO;
694*3f2dd94aSFrançois Tigeot
695*3f2dd94aSFrançois Tigeot oastatus = I915_READ(GEN8_OASTATUS);
696*3f2dd94aSFrançois Tigeot
697*3f2dd94aSFrançois Tigeot /*
698*3f2dd94aSFrançois Tigeot * We treat OABUFFER_OVERFLOW as a significant error:
699*3f2dd94aSFrançois Tigeot *
700*3f2dd94aSFrançois Tigeot * Although theoretically we could handle this more gracefully
701*3f2dd94aSFrançois Tigeot * sometimes, some Gens don't correctly suppress certain
702*3f2dd94aSFrançois Tigeot * automatically triggered reports in this condition and so we
703*3f2dd94aSFrançois Tigeot * have to assume that old reports are now being trampled
704*3f2dd94aSFrançois Tigeot * over.
705*3f2dd94aSFrançois Tigeot *
706*3f2dd94aSFrançois Tigeot * Considering how we don't currently give userspace control
707*3f2dd94aSFrançois Tigeot * over the OA buffer size and always configure a large 16MB
708*3f2dd94aSFrançois Tigeot * buffer, then a buffer overflow does anyway likely indicate
709*3f2dd94aSFrançois Tigeot * that something has gone quite badly wrong.
710*3f2dd94aSFrançois Tigeot */
711*3f2dd94aSFrançois Tigeot if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
712*3f2dd94aSFrançois Tigeot ret = append_oa_status(stream, buf, count, offset,
713*3f2dd94aSFrançois Tigeot DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
714*3f2dd94aSFrançois Tigeot if (ret)
715*3f2dd94aSFrançois Tigeot return ret;
716*3f2dd94aSFrançois Tigeot
717*3f2dd94aSFrançois Tigeot DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
718*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.period_exponent);
719*3f2dd94aSFrançois Tigeot
720*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable(dev_priv);
721*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable(dev_priv);
722*3f2dd94aSFrançois Tigeot
723*3f2dd94aSFrançois Tigeot /*
724*3f2dd94aSFrançois Tigeot * Note: .oa_enable() is expected to re-init the oabuffer and
725*3f2dd94aSFrançois Tigeot * reset GEN8_OASTATUS for us
726*3f2dd94aSFrançois Tigeot */
727*3f2dd94aSFrançois Tigeot oastatus = I915_READ(GEN8_OASTATUS);
728*3f2dd94aSFrançois Tigeot }
729*3f2dd94aSFrançois Tigeot
730*3f2dd94aSFrançois Tigeot if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
731*3f2dd94aSFrançois Tigeot ret = append_oa_status(stream, buf, count, offset,
732*3f2dd94aSFrançois Tigeot DRM_I915_PERF_RECORD_OA_REPORT_LOST);
733*3f2dd94aSFrançois Tigeot if (ret)
734*3f2dd94aSFrançois Tigeot return ret;
735*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OASTATUS,
736*3f2dd94aSFrançois Tigeot oastatus & ~GEN8_OASTATUS_REPORT_LOST);
737*3f2dd94aSFrançois Tigeot }
738*3f2dd94aSFrançois Tigeot
739*3f2dd94aSFrançois Tigeot return gen8_append_oa_reports(stream, buf, count, offset);
740*3f2dd94aSFrançois Tigeot }
741*3f2dd94aSFrançois Tigeot
742*3f2dd94aSFrançois Tigeot /**
743*3f2dd94aSFrançois Tigeot * Copies all buffered OA reports into userspace read() buffer.
744*3f2dd94aSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
745*3f2dd94aSFrançois Tigeot * @buf: destination buffer given by userspace
746*3f2dd94aSFrançois Tigeot * @count: the number of bytes userspace wants to read
747*3f2dd94aSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
748a85cb24fSFrançois Tigeot * @head_ptr: (inout): the current oa buffer cpu read position
749a85cb24fSFrançois Tigeot * @tail: the current oa buffer gpu write position
750a85cb24fSFrançois Tigeot *
751a85cb24fSFrançois Tigeot * Notably any error condition resulting in a short read (-%ENOSPC or
752a85cb24fSFrançois Tigeot * -%EFAULT) will be returned even though one or more records may
753a85cb24fSFrançois Tigeot * have been successfully copied. In this case it's up to the caller
754a85cb24fSFrançois Tigeot * to decide if the error should be squashed before returning to
755a85cb24fSFrançois Tigeot * userspace.
756a85cb24fSFrançois Tigeot *
757a85cb24fSFrançois Tigeot * Note: reports are consumed from the head, and appended to the
758a85cb24fSFrançois Tigeot * tail, so the head chases the tail?... If you think that's mad
759a85cb24fSFrançois Tigeot * and back-to-front you're not alone, but this follows the
760a85cb24fSFrançois Tigeot * Gen PRM naming convention.
761a85cb24fSFrançois Tigeot *
762a85cb24fSFrançois Tigeot * Returns: 0 on success, negative error code on failure.
763a85cb24fSFrançois Tigeot */
764a85cb24fSFrançois Tigeot static int gen7_append_oa_reports(struct i915_perf_stream *stream,
765a85cb24fSFrançois Tigeot char __user *buf,
766a85cb24fSFrançois Tigeot size_t count,
767a85cb24fSFrançois Tigeot size_t *offset,
768a85cb24fSFrançois Tigeot u32 *head_ptr,
769a85cb24fSFrançois Tigeot u32 tail)
770a85cb24fSFrançois Tigeot {
771a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
772a85cb24fSFrançois Tigeot int report_size = dev_priv->perf.oa.oa_buffer.format_size;
773a85cb24fSFrançois Tigeot u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
774a85cb24fSFrançois Tigeot int tail_margin = dev_priv->perf.oa.tail_margin;
775a85cb24fSFrançois Tigeot u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
776a85cb24fSFrançois Tigeot u32 mask = (OA_BUFFER_SIZE - 1);
777a85cb24fSFrançois Tigeot u32 head;
778a85cb24fSFrançois Tigeot u32 taken;
779a85cb24fSFrançois Tigeot int ret = 0;
780a85cb24fSFrançois Tigeot
781a85cb24fSFrançois Tigeot if (WARN_ON(!stream->enabled))
782a85cb24fSFrançois Tigeot return -EIO;
783a85cb24fSFrançois Tigeot
784a85cb24fSFrançois Tigeot head = *head_ptr - gtt_offset;
785a85cb24fSFrançois Tigeot tail -= gtt_offset;
786a85cb24fSFrançois Tigeot
787a85cb24fSFrançois Tigeot /* The OA unit is expected to wrap the tail pointer according to the OA
788a85cb24fSFrançois Tigeot * buffer size and since we should never write a misaligned head
789a85cb24fSFrançois Tigeot * pointer we don't expect to read one back either...
790a85cb24fSFrançois Tigeot */
791a85cb24fSFrançois Tigeot if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
792a85cb24fSFrançois Tigeot head % report_size) {
793a85cb24fSFrançois Tigeot DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
794a85cb24fSFrançois Tigeot head, tail);
795a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable(dev_priv);
796a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable(dev_priv);
797a85cb24fSFrançois Tigeot *head_ptr = I915_READ(GEN7_OASTATUS2) &
798a85cb24fSFrançois Tigeot GEN7_OASTATUS2_HEAD_MASK;
799a85cb24fSFrançois Tigeot return -EIO;
800a85cb24fSFrançois Tigeot }
801a85cb24fSFrançois Tigeot
802a85cb24fSFrançois Tigeot
803a85cb24fSFrançois Tigeot /* The tail pointer increases in 64 byte increments, not in report_size
804a85cb24fSFrançois Tigeot * steps...
805a85cb24fSFrançois Tigeot */
806a85cb24fSFrançois Tigeot tail &= ~(report_size - 1);
807a85cb24fSFrançois Tigeot
808a85cb24fSFrançois Tigeot /* Move the tail pointer back by the current tail_margin to account for
809a85cb24fSFrançois Tigeot * the possibility that the latest reports may not have really landed
810a85cb24fSFrançois Tigeot * in memory yet...
811a85cb24fSFrançois Tigeot */
812a85cb24fSFrançois Tigeot
813a85cb24fSFrançois Tigeot if (OA_TAKEN(tail, head) < report_size + tail_margin)
814a85cb24fSFrançois Tigeot return -EAGAIN;
815a85cb24fSFrançois Tigeot
816a85cb24fSFrançois Tigeot tail -= tail_margin;
817a85cb24fSFrançois Tigeot tail &= mask;
818a85cb24fSFrançois Tigeot
819a85cb24fSFrançois Tigeot for (/* none */;
820a85cb24fSFrançois Tigeot (taken = OA_TAKEN(tail, head));
821a85cb24fSFrançois Tigeot head = (head + report_size) & mask) {
822a85cb24fSFrançois Tigeot u8 *report = oa_buf_base + head;
823a85cb24fSFrançois Tigeot u32 *report32 = (void *)report;
824a85cb24fSFrançois Tigeot
825a85cb24fSFrançois Tigeot /* All the report sizes factor neatly into the buffer
826a85cb24fSFrançois Tigeot * size so we never expect to see a report split
827a85cb24fSFrançois Tigeot * between the beginning and end of the buffer.
828a85cb24fSFrançois Tigeot *
829a85cb24fSFrançois Tigeot * Given the initial alignment check a misalignment
830a85cb24fSFrançois Tigeot * here would imply a driver bug that would result
831a85cb24fSFrançois Tigeot * in an overrun.
832a85cb24fSFrançois Tigeot */
833a85cb24fSFrançois Tigeot if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
834a85cb24fSFrançois Tigeot DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
835a85cb24fSFrançois Tigeot break;
836a85cb24fSFrançois Tigeot }
837a85cb24fSFrançois Tigeot
838a85cb24fSFrançois Tigeot /* The report-ID field for periodic samples includes
839a85cb24fSFrançois Tigeot * some undocumented flags related to what triggered
840a85cb24fSFrançois Tigeot * the report and is never expected to be zero so we
841a85cb24fSFrançois Tigeot * can check that the report isn't invalid before
842a85cb24fSFrançois Tigeot * copying it to userspace...
843a85cb24fSFrançois Tigeot */
844a85cb24fSFrançois Tigeot if (report32[0] == 0) {
845a85cb24fSFrançois Tigeot DRM_NOTE("Skipping spurious, invalid OA report\n");
846a85cb24fSFrançois Tigeot continue;
847a85cb24fSFrançois Tigeot }
848a85cb24fSFrançois Tigeot
849a85cb24fSFrançois Tigeot ret = append_oa_sample(stream, buf, count, offset, report);
850a85cb24fSFrançois Tigeot if (ret)
851a85cb24fSFrançois Tigeot break;
852a85cb24fSFrançois Tigeot
853a85cb24fSFrançois Tigeot /* The above report-id field sanity check is based on
854a85cb24fSFrançois Tigeot * the assumption that the OA buffer is initially
855a85cb24fSFrançois Tigeot * zeroed and we reset the field after copying so the
856a85cb24fSFrançois Tigeot * check is still meaningful once old reports start
857a85cb24fSFrançois Tigeot * being overwritten.
858a85cb24fSFrançois Tigeot */
859a85cb24fSFrançois Tigeot report32[0] = 0;
860a85cb24fSFrançois Tigeot }
861a85cb24fSFrançois Tigeot
862a85cb24fSFrançois Tigeot *head_ptr = gtt_offset + head;
863a85cb24fSFrançois Tigeot
864a85cb24fSFrançois Tigeot return ret;
865a85cb24fSFrançois Tigeot }
866a85cb24fSFrançois Tigeot
867a85cb24fSFrançois Tigeot /**
868a85cb24fSFrançois Tigeot * gen7_oa_read - copy status records then buffered OA reports
869a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
870a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
871a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
872a85cb24fSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
873a85cb24fSFrançois Tigeot *
874a85cb24fSFrançois Tigeot * Checks Gen 7 specific OA unit status registers and if necessary appends
875a85cb24fSFrançois Tigeot * corresponding status records for userspace (such as for a buffer full
876a85cb24fSFrançois Tigeot * condition) and then initiate appending any buffered OA reports.
877a85cb24fSFrançois Tigeot *
878a85cb24fSFrançois Tigeot * Updates @offset according to the number of bytes successfully copied into
879a85cb24fSFrançois Tigeot * the userspace buffer.
880a85cb24fSFrançois Tigeot *
881a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code
882a85cb24fSFrançois Tigeot */
883a85cb24fSFrançois Tigeot static int gen7_oa_read(struct i915_perf_stream *stream,
884a85cb24fSFrançois Tigeot char __user *buf,
885a85cb24fSFrançois Tigeot size_t count,
886a85cb24fSFrançois Tigeot size_t *offset)
887a85cb24fSFrançois Tigeot {
888a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
889a85cb24fSFrançois Tigeot int report_size = dev_priv->perf.oa.oa_buffer.format_size;
890a85cb24fSFrançois Tigeot u32 oastatus2;
891a85cb24fSFrançois Tigeot u32 oastatus1;
892a85cb24fSFrançois Tigeot u32 head;
893a85cb24fSFrançois Tigeot u32 tail;
894a85cb24fSFrançois Tigeot int ret;
895a85cb24fSFrançois Tigeot
896a85cb24fSFrançois Tigeot if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
897a85cb24fSFrançois Tigeot return -EIO;
898a85cb24fSFrançois Tigeot
899a85cb24fSFrançois Tigeot oastatus2 = I915_READ(GEN7_OASTATUS2);
900a85cb24fSFrançois Tigeot oastatus1 = I915_READ(GEN7_OASTATUS1);
901a85cb24fSFrançois Tigeot
902a85cb24fSFrançois Tigeot head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
903a85cb24fSFrançois Tigeot tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
904a85cb24fSFrançois Tigeot
905a85cb24fSFrançois Tigeot /* XXX: On Haswell we don't have a safe way to clear oastatus1
906a85cb24fSFrançois Tigeot * bits while the OA unit is enabled (while the tail pointer
907a85cb24fSFrançois Tigeot * may be updated asynchronously) so we ignore status bits
908a85cb24fSFrançois Tigeot * that have already been reported to userspace.
909a85cb24fSFrançois Tigeot */
910a85cb24fSFrançois Tigeot oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
911a85cb24fSFrançois Tigeot
912a85cb24fSFrançois Tigeot /* We treat OABUFFER_OVERFLOW as a significant error:
913a85cb24fSFrançois Tigeot *
914a85cb24fSFrançois Tigeot * - The status can be interpreted to mean that the buffer is
915a85cb24fSFrançois Tigeot * currently full (with a higher precedence than OA_TAKEN()
916a85cb24fSFrançois Tigeot * which will start to report a near-empty buffer after an
917a85cb24fSFrançois Tigeot * overflow) but it's awkward that we can't clear the status
918a85cb24fSFrançois Tigeot * on Haswell, so without a reset we won't be able to catch
919a85cb24fSFrançois Tigeot * the state again.
920a85cb24fSFrançois Tigeot *
921a85cb24fSFrançois Tigeot * - Since it also implies the HW has started overwriting old
922a85cb24fSFrançois Tigeot * reports it may also affect our sanity checks for invalid
923a85cb24fSFrançois Tigeot * reports when copying to userspace that assume new reports
924a85cb24fSFrançois Tigeot * are being written to cleared memory.
925a85cb24fSFrançois Tigeot *
926a85cb24fSFrançois Tigeot * - In the future we may want to introduce a flight recorder
927a85cb24fSFrançois Tigeot * mode where the driver will automatically maintain a safe
928a85cb24fSFrançois Tigeot * guard band between head/tail, avoiding this overflow
929a85cb24fSFrançois Tigeot * condition, but we avoid the added driver complexity for
930a85cb24fSFrançois Tigeot * now.
931a85cb24fSFrançois Tigeot */
932a85cb24fSFrançois Tigeot if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
933a85cb24fSFrançois Tigeot ret = append_oa_status(stream, buf, count, offset,
934a85cb24fSFrançois Tigeot DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
935a85cb24fSFrançois Tigeot if (ret)
936a85cb24fSFrançois Tigeot return ret;
937a85cb24fSFrançois Tigeot
938*3f2dd94aSFrançois Tigeot DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
939*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.period_exponent);
940a85cb24fSFrançois Tigeot
941a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable(dev_priv);
942a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable(dev_priv);
943a85cb24fSFrançois Tigeot
944a85cb24fSFrançois Tigeot oastatus2 = I915_READ(GEN7_OASTATUS2);
945a85cb24fSFrançois Tigeot oastatus1 = I915_READ(GEN7_OASTATUS1);
946a85cb24fSFrançois Tigeot
947a85cb24fSFrançois Tigeot head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
948a85cb24fSFrançois Tigeot tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
949a85cb24fSFrançois Tigeot }
950a85cb24fSFrançois Tigeot
951a85cb24fSFrançois Tigeot if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
952a85cb24fSFrançois Tigeot ret = append_oa_status(stream, buf, count, offset,
953a85cb24fSFrançois Tigeot DRM_I915_PERF_RECORD_OA_REPORT_LOST);
954a85cb24fSFrançois Tigeot if (ret)
955a85cb24fSFrançois Tigeot return ret;
956a85cb24fSFrançois Tigeot dev_priv->perf.oa.gen7_latched_oastatus1 |=
957a85cb24fSFrançois Tigeot GEN7_OASTATUS1_REPORT_LOST;
958a85cb24fSFrançois Tigeot }
959a85cb24fSFrançois Tigeot
960a85cb24fSFrançois Tigeot ret = gen7_append_oa_reports(stream, buf, count, offset,
961a85cb24fSFrançois Tigeot &head, tail);
962a85cb24fSFrançois Tigeot
963a85cb24fSFrançois Tigeot /* All the report sizes are a power of two and the
964a85cb24fSFrançois Tigeot * head should always be incremented by some multiple
965a85cb24fSFrançois Tigeot * of the report size.
966a85cb24fSFrançois Tigeot *
967a85cb24fSFrançois Tigeot * A warning here, but notably if we later read back a
968a85cb24fSFrançois Tigeot * misaligned pointer we will treat that as a bug since
969a85cb24fSFrançois Tigeot * it could lead to a buffer overrun.
970a85cb24fSFrançois Tigeot */
971a85cb24fSFrançois Tigeot WARN_ONCE(head & (report_size - 1),
972a85cb24fSFrançois Tigeot "i915: Writing misaligned OA head pointer");
973a85cb24fSFrançois Tigeot
974a85cb24fSFrançois Tigeot /* Note: we update the head pointer here even if an error
975a85cb24fSFrançois Tigeot * was returned since the error may represent a short read
976a85cb24fSFrançois Tigeot * where some some reports were successfully copied.
977a85cb24fSFrançois Tigeot */
978a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OASTATUS2,
979a85cb24fSFrançois Tigeot ((head & GEN7_OASTATUS2_HEAD_MASK) |
980a85cb24fSFrançois Tigeot OA_MEM_SELECT_GGTT));
981a85cb24fSFrançois Tigeot
982a85cb24fSFrançois Tigeot return ret;
983a85cb24fSFrançois Tigeot }
984a85cb24fSFrançois Tigeot
985a85cb24fSFrançois Tigeot /**
986a85cb24fSFrançois Tigeot * i915_oa_wait_unlocked - handles blocking IO until OA data available
987a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
988a85cb24fSFrançois Tigeot *
989a85cb24fSFrançois Tigeot * Called when userspace tries to read() from a blocking stream FD opened
990a85cb24fSFrançois Tigeot * for OA metrics. It waits until the hrtimer callback finds a non-empty
991a85cb24fSFrançois Tigeot * OA buffer and wakes us.
992a85cb24fSFrançois Tigeot *
993a85cb24fSFrançois Tigeot * Note: it's acceptable to have this return with some false positives
994a85cb24fSFrançois Tigeot * since any subsequent read handling will return -EAGAIN if there isn't
995a85cb24fSFrançois Tigeot * really data ready for userspace yet.
996a85cb24fSFrançois Tigeot *
997a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code
998a85cb24fSFrançois Tigeot */
999a85cb24fSFrançois Tigeot static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1000a85cb24fSFrançois Tigeot {
1001a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1002a85cb24fSFrançois Tigeot
1003a85cb24fSFrançois Tigeot /* We would wait indefinitely if periodic sampling is not enabled */
1004a85cb24fSFrançois Tigeot if (!dev_priv->perf.oa.periodic)
1005a85cb24fSFrançois Tigeot return -EIO;
1006a85cb24fSFrançois Tigeot
1007a85cb24fSFrançois Tigeot /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
1008a85cb24fSFrançois Tigeot * just performs mmio reads of the OA buffer head + tail pointers and
1009a85cb24fSFrançois Tigeot * it's assumed we're handling some operation that implies the stream
1010a85cb24fSFrançois Tigeot * can't be destroyed until completion (such as a read()) that ensures
1011a85cb24fSFrançois Tigeot * the device + OA buffer can't disappear
1012a85cb24fSFrançois Tigeot */
1013a85cb24fSFrançois Tigeot return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
1014a85cb24fSFrançois Tigeot !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
1015a85cb24fSFrançois Tigeot }
1016a85cb24fSFrançois Tigeot
1017a85cb24fSFrançois Tigeot /**
1018a85cb24fSFrançois Tigeot * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1019a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
1020a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
1021a85cb24fSFrançois Tigeot * @wait: poll() state table
1022a85cb24fSFrançois Tigeot *
1023a85cb24fSFrançois Tigeot * For handling userspace polling on an i915 perf stream opened for OA metrics,
1024a85cb24fSFrançois Tigeot * this starts a poll_wait with the wait queue that our hrtimer callback wakes
1025a85cb24fSFrançois Tigeot * when it sees data ready to read in the circular OA buffer.
1026a85cb24fSFrançois Tigeot */
1027a85cb24fSFrançois Tigeot static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1028a85cb24fSFrançois Tigeot struct file *file,
1029a85cb24fSFrançois Tigeot poll_table *wait)
1030a85cb24fSFrançois Tigeot {
1031a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1032a85cb24fSFrançois Tigeot
1033a85cb24fSFrançois Tigeot poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
1034a85cb24fSFrançois Tigeot }
1035a85cb24fSFrançois Tigeot
1036a85cb24fSFrançois Tigeot /**
1037a85cb24fSFrançois Tigeot * i915_oa_read - just calls through to &i915_oa_ops->read
1038a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
1039a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
1040a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
1041a85cb24fSFrançois Tigeot * @offset: (inout): the current position for writing into @buf
1042a85cb24fSFrançois Tigeot *
1043a85cb24fSFrançois Tigeot * Updates @offset according to the number of bytes successfully copied into
1044a85cb24fSFrançois Tigeot * the userspace buffer.
1045a85cb24fSFrançois Tigeot *
1046a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code
1047a85cb24fSFrançois Tigeot */
1048a85cb24fSFrançois Tigeot static int i915_oa_read(struct i915_perf_stream *stream,
1049a85cb24fSFrançois Tigeot char __user *buf,
1050a85cb24fSFrançois Tigeot size_t count,
1051a85cb24fSFrançois Tigeot size_t *offset)
1052a85cb24fSFrançois Tigeot {
1053a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1054a85cb24fSFrançois Tigeot
1055a85cb24fSFrançois Tigeot return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
1056a85cb24fSFrançois Tigeot }
1057a85cb24fSFrançois Tigeot
1058a85cb24fSFrançois Tigeot /**
1059a85cb24fSFrançois Tigeot * oa_get_render_ctx_id - determine and hold ctx hw id
1060a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
1061a85cb24fSFrançois Tigeot *
1062a85cb24fSFrançois Tigeot * Determine the render context hw id, and ensure it remains fixed for the
1063a85cb24fSFrançois Tigeot * lifetime of the stream. This ensures that we don't have to worry about
1064a85cb24fSFrançois Tigeot * updating the context ID in OACONTROL on the fly.
1065a85cb24fSFrançois Tigeot *
1066a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code
1067a85cb24fSFrançois Tigeot */
1068a85cb24fSFrançois Tigeot static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1069a85cb24fSFrançois Tigeot {
1070a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1071*3f2dd94aSFrançois Tigeot
1072*3f2dd94aSFrançois Tigeot if (i915_modparams.enable_execlists)
1073*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
1074*3f2dd94aSFrançois Tigeot else {
1075a85cb24fSFrançois Tigeot struct intel_engine_cs *engine = dev_priv->engine[RCS];
1076*3f2dd94aSFrançois Tigeot struct intel_ring *ring;
1077a85cb24fSFrançois Tigeot int ret;
1078a85cb24fSFrançois Tigeot
1079a85cb24fSFrançois Tigeot ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1080a85cb24fSFrançois Tigeot if (ret)
1081a85cb24fSFrançois Tigeot return ret;
1082a85cb24fSFrançois Tigeot
1083*3f2dd94aSFrançois Tigeot /*
1084*3f2dd94aSFrançois Tigeot * As the ID is the gtt offset of the context's vma we
1085*3f2dd94aSFrançois Tigeot * pin the vma to ensure the ID remains fixed.
1086a85cb24fSFrançois Tigeot *
1087a85cb24fSFrançois Tigeot * NB: implied RCS engine...
1088a85cb24fSFrançois Tigeot */
1089*3f2dd94aSFrançois Tigeot ring = engine->context_pin(engine, stream->ctx);
1090*3f2dd94aSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1091*3f2dd94aSFrançois Tigeot if (IS_ERR(ring))
1092*3f2dd94aSFrançois Tigeot return PTR_ERR(ring);
1093a85cb24fSFrançois Tigeot
1094*3f2dd94aSFrançois Tigeot
1095*3f2dd94aSFrançois Tigeot /*
1096*3f2dd94aSFrançois Tigeot * Explicitly track the ID (instead of calling
1097*3f2dd94aSFrançois Tigeot * i915_ggtt_offset() on the fly) considering the difference
1098*3f2dd94aSFrançois Tigeot * with gen8+ and execlists
1099a85cb24fSFrançois Tigeot */
1100a85cb24fSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id =
1101a85cb24fSFrançois Tigeot i915_ggtt_offset(stream->ctx->engine[engine->id].state);
1102*3f2dd94aSFrançois Tigeot }
1103a85cb24fSFrançois Tigeot
1104*3f2dd94aSFrançois Tigeot return 0;
1105a85cb24fSFrançois Tigeot }
1106a85cb24fSFrançois Tigeot
1107a85cb24fSFrançois Tigeot /**
1108a85cb24fSFrançois Tigeot * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1109a85cb24fSFrançois Tigeot * @stream: An i915-perf stream opened for OA metrics
1110a85cb24fSFrançois Tigeot *
1111a85cb24fSFrançois Tigeot * In case anything needed doing to ensure the context HW ID would remain valid
1112a85cb24fSFrançois Tigeot * for the lifetime of the stream, then that can be undone here.
1113a85cb24fSFrançois Tigeot */
1114a85cb24fSFrançois Tigeot static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1115a85cb24fSFrançois Tigeot {
1116a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1117*3f2dd94aSFrançois Tigeot
1118*3f2dd94aSFrançois Tigeot if (i915_modparams.enable_execlists) {
1119*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1120*3f2dd94aSFrançois Tigeot } else {
1121a85cb24fSFrançois Tigeot struct intel_engine_cs *engine = dev_priv->engine[RCS];
1122a85cb24fSFrançois Tigeot
1123a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->drm.struct_mutex);
1124a85cb24fSFrançois Tigeot
1125a85cb24fSFrançois Tigeot dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1126a85cb24fSFrançois Tigeot engine->context_unpin(engine, stream->ctx);
1127a85cb24fSFrançois Tigeot
1128a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1129a85cb24fSFrançois Tigeot }
1130*3f2dd94aSFrançois Tigeot }
1131a85cb24fSFrançois Tigeot
1132a85cb24fSFrançois Tigeot static void
1133a85cb24fSFrançois Tigeot free_oa_buffer(struct drm_i915_private *i915)
1134a85cb24fSFrançois Tigeot {
1135a85cb24fSFrançois Tigeot mutex_lock(&i915->drm.struct_mutex);
1136a85cb24fSFrançois Tigeot
1137a85cb24fSFrançois Tigeot i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
1138a85cb24fSFrançois Tigeot i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
1139a85cb24fSFrançois Tigeot i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
1140a85cb24fSFrançois Tigeot
1141a85cb24fSFrançois Tigeot i915->perf.oa.oa_buffer.vma = NULL;
1142a85cb24fSFrançois Tigeot i915->perf.oa.oa_buffer.vaddr = NULL;
1143a85cb24fSFrançois Tigeot
1144a85cb24fSFrançois Tigeot mutex_unlock(&i915->drm.struct_mutex);
1145a85cb24fSFrançois Tigeot }
1146a85cb24fSFrançois Tigeot
1147a85cb24fSFrançois Tigeot static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1148a85cb24fSFrançois Tigeot {
1149a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1150a85cb24fSFrançois Tigeot
1151a85cb24fSFrançois Tigeot BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
1152a85cb24fSFrançois Tigeot
1153*3f2dd94aSFrançois Tigeot /*
1154*3f2dd94aSFrançois Tigeot * Unset exclusive_stream first, it will be checked while disabling
1155*3f2dd94aSFrançois Tigeot * the metric set on gen8+.
1156*3f2dd94aSFrançois Tigeot */
1157*3f2dd94aSFrançois Tigeot mutex_lock(&dev_priv->drm.struct_mutex);
1158*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.exclusive_stream = NULL;
1159a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1160*3f2dd94aSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1161a85cb24fSFrançois Tigeot
1162a85cb24fSFrançois Tigeot free_oa_buffer(dev_priv);
1163a85cb24fSFrançois Tigeot
1164a85cb24fSFrançois Tigeot intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1165a85cb24fSFrançois Tigeot intel_runtime_pm_put(dev_priv);
1166a85cb24fSFrançois Tigeot
1167a85cb24fSFrançois Tigeot if (stream->ctx)
1168a85cb24fSFrançois Tigeot oa_put_render_ctx_id(stream);
1169a85cb24fSFrançois Tigeot
1170a85cb24fSFrançois Tigeot dev_priv->perf.oa.exclusive_stream = NULL;
1171a85cb24fSFrançois Tigeot }
1172a85cb24fSFrançois Tigeot
1173a85cb24fSFrançois Tigeot static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
1174a85cb24fSFrançois Tigeot {
1175a85cb24fSFrançois Tigeot u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1176a85cb24fSFrançois Tigeot
1177a85cb24fSFrançois Tigeot /* Pre-DevBDW: OABUFFER must be set with counters off,
1178a85cb24fSFrançois Tigeot * before OASTATUS1, but after OASTATUS2
1179a85cb24fSFrançois Tigeot */
1180a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
1181a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OABUFFER, gtt_offset);
1182a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
1183a85cb24fSFrançois Tigeot
1184a85cb24fSFrançois Tigeot /* On Haswell we have to track which OASTATUS1 flags we've
1185a85cb24fSFrançois Tigeot * already seen since they can't be cleared while periodic
1186a85cb24fSFrançois Tigeot * sampling is enabled.
1187a85cb24fSFrançois Tigeot */
1188a85cb24fSFrançois Tigeot dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
1189a85cb24fSFrançois Tigeot
1190a85cb24fSFrançois Tigeot /* NB: although the OA buffer will initially be allocated
1191a85cb24fSFrançois Tigeot * zeroed via shmfs (and so this memset is redundant when
1192a85cb24fSFrançois Tigeot * first allocating), we may re-init the OA buffer, either
1193a85cb24fSFrançois Tigeot * when re-enabling a stream or in error/reset paths.
1194a85cb24fSFrançois Tigeot *
1195a85cb24fSFrançois Tigeot * The reason we clear the buffer for each re-init is for the
1196a85cb24fSFrançois Tigeot * sanity check in gen7_append_oa_reports() that looks at the
1197a85cb24fSFrançois Tigeot * report-id field to make sure it's non-zero which relies on
1198a85cb24fSFrançois Tigeot * the assumption that new reports are being written to zeroed
1199a85cb24fSFrançois Tigeot * memory...
1200a85cb24fSFrançois Tigeot */
1201a85cb24fSFrançois Tigeot memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1202a85cb24fSFrançois Tigeot
1203a85cb24fSFrançois Tigeot /* Maybe make ->pollin per-stream state if we support multiple
1204a85cb24fSFrançois Tigeot * concurrent streams in the future.
1205a85cb24fSFrançois Tigeot */
1206a85cb24fSFrançois Tigeot dev_priv->perf.oa.pollin = false;
1207a85cb24fSFrançois Tigeot }
1208a85cb24fSFrançois Tigeot
1209*3f2dd94aSFrançois Tigeot static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
1210*3f2dd94aSFrançois Tigeot {
1211*3f2dd94aSFrançois Tigeot u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1212*3f2dd94aSFrançois Tigeot unsigned long flags;
1213*3f2dd94aSFrançois Tigeot
1214*3f2dd94aSFrançois Tigeot spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1215*3f2dd94aSFrançois Tigeot
1216*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OASTATUS, 0);
1217*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
1218*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1219*3f2dd94aSFrançois Tigeot
1220*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OABUFFER_UDW, 0);
1221*3f2dd94aSFrançois Tigeot
1222*3f2dd94aSFrançois Tigeot /*
1223*3f2dd94aSFrançois Tigeot * PRM says:
1224*3f2dd94aSFrançois Tigeot *
1225*3f2dd94aSFrançois Tigeot * "This MMIO must be set before the OATAILPTR
1226*3f2dd94aSFrançois Tigeot * register and after the OAHEADPTR register. This is
1227*3f2dd94aSFrançois Tigeot * to enable proper functionality of the overflow
1228*3f2dd94aSFrançois Tigeot * bit."
1229*3f2dd94aSFrançois Tigeot */
1230*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OABUFFER, gtt_offset |
1231*3f2dd94aSFrançois Tigeot OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT);
1232*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1233*3f2dd94aSFrançois Tigeot
1234*3f2dd94aSFrançois Tigeot /* Mark that we need updated tail pointers to read from... */
1235*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1236*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1237*3f2dd94aSFrançois Tigeot
1238*3f2dd94aSFrançois Tigeot /*
1239*3f2dd94aSFrançois Tigeot * Reset state used to recognise context switches, affecting which
1240*3f2dd94aSFrançois Tigeot * reports we will forward to userspace while filtering for a single
1241*3f2dd94aSFrançois Tigeot * context.
1242*3f2dd94aSFrançois Tigeot */
1243*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
1244*3f2dd94aSFrançois Tigeot
1245*3f2dd94aSFrançois Tigeot spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1246*3f2dd94aSFrançois Tigeot
1247*3f2dd94aSFrançois Tigeot /*
1248*3f2dd94aSFrançois Tigeot * NB: although the OA buffer will initially be allocated
1249*3f2dd94aSFrançois Tigeot * zeroed via shmfs (and so this memset is redundant when
1250*3f2dd94aSFrançois Tigeot * first allocating), we may re-init the OA buffer, either
1251*3f2dd94aSFrançois Tigeot * when re-enabling a stream or in error/reset paths.
1252*3f2dd94aSFrançois Tigeot *
1253*3f2dd94aSFrançois Tigeot * The reason we clear the buffer for each re-init is for the
1254*3f2dd94aSFrançois Tigeot * sanity check in gen8_append_oa_reports() that looks at the
1255*3f2dd94aSFrançois Tigeot * reason field to make sure it's non-zero which relies on
1256*3f2dd94aSFrançois Tigeot * the assumption that new reports are being written to zeroed
1257*3f2dd94aSFrançois Tigeot * memory...
1258*3f2dd94aSFrançois Tigeot */
1259*3f2dd94aSFrançois Tigeot memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1260*3f2dd94aSFrançois Tigeot
1261*3f2dd94aSFrançois Tigeot /*
1262*3f2dd94aSFrançois Tigeot * Maybe make ->pollin per-stream state if we support multiple
1263*3f2dd94aSFrançois Tigeot * concurrent streams in the future.
1264*3f2dd94aSFrançois Tigeot */
1265*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.pollin = false;
1266*3f2dd94aSFrançois Tigeot }
1267*3f2dd94aSFrançois Tigeot
1268a85cb24fSFrançois Tigeot static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
1269a85cb24fSFrançois Tigeot {
1270a85cb24fSFrançois Tigeot struct drm_i915_gem_object *bo;
1271a85cb24fSFrançois Tigeot struct i915_vma *vma;
1272a85cb24fSFrançois Tigeot int ret;
1273a85cb24fSFrançois Tigeot
1274a85cb24fSFrançois Tigeot if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
1275a85cb24fSFrançois Tigeot return -ENODEV;
1276a85cb24fSFrançois Tigeot
1277a85cb24fSFrançois Tigeot ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1278a85cb24fSFrançois Tigeot if (ret)
1279a85cb24fSFrançois Tigeot return ret;
1280a85cb24fSFrançois Tigeot
1281a85cb24fSFrançois Tigeot BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1282a85cb24fSFrançois Tigeot BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1283a85cb24fSFrançois Tigeot
1284a85cb24fSFrançois Tigeot bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
1285a85cb24fSFrançois Tigeot if (IS_ERR(bo)) {
1286a85cb24fSFrançois Tigeot DRM_ERROR("Failed to allocate OA buffer\n");
1287a85cb24fSFrançois Tigeot ret = PTR_ERR(bo);
1288a85cb24fSFrançois Tigeot goto unlock;
1289a85cb24fSFrançois Tigeot }
1290a85cb24fSFrançois Tigeot
1291a85cb24fSFrançois Tigeot ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
1292a85cb24fSFrançois Tigeot if (ret)
1293a85cb24fSFrançois Tigeot goto err_unref;
1294a85cb24fSFrançois Tigeot
1295a85cb24fSFrançois Tigeot /* PreHSW required 512K alignment, HSW requires 16M */
1296a85cb24fSFrançois Tigeot vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1297a85cb24fSFrançois Tigeot if (IS_ERR(vma)) {
1298a85cb24fSFrançois Tigeot ret = PTR_ERR(vma);
1299a85cb24fSFrançois Tigeot goto err_unref;
1300a85cb24fSFrançois Tigeot }
1301a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.vma = vma;
1302a85cb24fSFrançois Tigeot
1303a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.vaddr =
1304a85cb24fSFrançois Tigeot i915_gem_object_pin_map(bo, I915_MAP_WB);
1305a85cb24fSFrançois Tigeot if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
1306a85cb24fSFrançois Tigeot ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
1307a85cb24fSFrançois Tigeot goto err_unpin;
1308a85cb24fSFrançois Tigeot }
1309a85cb24fSFrançois Tigeot
1310a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
1311a85cb24fSFrançois Tigeot
1312a85cb24fSFrançois Tigeot DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
1313a85cb24fSFrançois Tigeot i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
1314a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.vaddr);
1315a85cb24fSFrançois Tigeot
1316a85cb24fSFrançois Tigeot goto unlock;
1317a85cb24fSFrançois Tigeot
1318a85cb24fSFrançois Tigeot err_unpin:
1319a85cb24fSFrançois Tigeot __i915_vma_unpin(vma);
1320a85cb24fSFrançois Tigeot
1321a85cb24fSFrançois Tigeot err_unref:
1322a85cb24fSFrançois Tigeot i915_gem_object_put(bo);
1323a85cb24fSFrançois Tigeot
1324a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.vaddr = NULL;
1325a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.vma = NULL;
1326a85cb24fSFrançois Tigeot
1327a85cb24fSFrançois Tigeot unlock:
1328a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1329a85cb24fSFrançois Tigeot return ret;
1330a85cb24fSFrançois Tigeot }
1331a85cb24fSFrançois Tigeot
1332a85cb24fSFrançois Tigeot static void config_oa_regs(struct drm_i915_private *dev_priv,
1333a85cb24fSFrançois Tigeot const struct i915_oa_reg *regs,
1334*3f2dd94aSFrançois Tigeot u32 n_regs)
1335a85cb24fSFrançois Tigeot {
1336*3f2dd94aSFrançois Tigeot u32 i;
1337a85cb24fSFrançois Tigeot
1338a85cb24fSFrançois Tigeot for (i = 0; i < n_regs; i++) {
1339a85cb24fSFrançois Tigeot const struct i915_oa_reg *reg = regs + i;
1340a85cb24fSFrançois Tigeot
1341a85cb24fSFrançois Tigeot I915_WRITE(reg->addr, reg->value);
1342a85cb24fSFrançois Tigeot }
1343a85cb24fSFrançois Tigeot }
1344a85cb24fSFrançois Tigeot
1345*3f2dd94aSFrançois Tigeot static int hsw_enable_metric_set(struct drm_i915_private *dev_priv,
1346*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1347a85cb24fSFrançois Tigeot {
1348a85cb24fSFrançois Tigeot /* PRM:
1349a85cb24fSFrançois Tigeot *
1350a85cb24fSFrançois Tigeot * OA unit is using “crclk” for its functionality. When trunk
1351a85cb24fSFrançois Tigeot * level clock gating takes place, OA clock would be gated,
1352a85cb24fSFrançois Tigeot * unable to count the events from non-render clock domain.
1353a85cb24fSFrançois Tigeot * Render clock gating must be disabled when OA is enabled to
1354a85cb24fSFrançois Tigeot * count the events from non-render domain. Unit level clock
1355a85cb24fSFrançois Tigeot * gating for RCS should also be disabled.
1356a85cb24fSFrançois Tigeot */
1357a85cb24fSFrançois Tigeot I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1358a85cb24fSFrançois Tigeot ~GEN7_DOP_CLOCK_GATE_ENABLE));
1359a85cb24fSFrançois Tigeot I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
1360a85cb24fSFrançois Tigeot GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1361a85cb24fSFrançois Tigeot
1362*3f2dd94aSFrançois Tigeot config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1363a85cb24fSFrançois Tigeot
1364a85cb24fSFrançois Tigeot /* It apparently takes a fairly long time for a new MUX
1365a85cb24fSFrançois Tigeot * configuration to be be applied after these register writes.
1366a85cb24fSFrançois Tigeot * This delay duration was derived empirically based on the
1367a85cb24fSFrançois Tigeot * render_basic config but hopefully it covers the maximum
1368a85cb24fSFrançois Tigeot * configuration latency.
1369a85cb24fSFrançois Tigeot *
1370a85cb24fSFrançois Tigeot * As a fallback, the checks in _append_oa_reports() to skip
1371a85cb24fSFrançois Tigeot * invalid OA reports do also seem to work to discard reports
1372a85cb24fSFrançois Tigeot * generated before this config has completed - albeit not
1373a85cb24fSFrançois Tigeot * silently.
1374a85cb24fSFrançois Tigeot *
1375a85cb24fSFrançois Tigeot * Unfortunately this is essentially a magic number, since we
1376a85cb24fSFrançois Tigeot * don't currently know of a reliable mechanism for predicting
1377a85cb24fSFrançois Tigeot * how long the MUX config will take to apply and besides
1378a85cb24fSFrançois Tigeot * seeing invalid reports we don't know of a reliable way to
1379a85cb24fSFrançois Tigeot * explicitly check that the MUX config has landed.
1380a85cb24fSFrançois Tigeot *
1381a85cb24fSFrançois Tigeot * It's even possible we've miss characterized the underlying
1382a85cb24fSFrançois Tigeot * problem - it just seems like the simplest explanation why
1383a85cb24fSFrançois Tigeot * a delay at this location would mitigate any invalid reports.
1384a85cb24fSFrançois Tigeot */
1385a85cb24fSFrançois Tigeot usleep_range(15000, 20000);
1386a85cb24fSFrançois Tigeot
1387*3f2dd94aSFrançois Tigeot config_oa_regs(dev_priv, oa_config->b_counter_regs,
1388*3f2dd94aSFrançois Tigeot oa_config->b_counter_regs_len);
1389a85cb24fSFrançois Tigeot
1390a85cb24fSFrançois Tigeot return 0;
1391a85cb24fSFrançois Tigeot }
1392a85cb24fSFrançois Tigeot
1393a85cb24fSFrançois Tigeot static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
1394a85cb24fSFrançois Tigeot {
1395a85cb24fSFrançois Tigeot I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
1396a85cb24fSFrançois Tigeot ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1397a85cb24fSFrançois Tigeot I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
1398a85cb24fSFrançois Tigeot GEN7_DOP_CLOCK_GATE_ENABLE));
1399a85cb24fSFrançois Tigeot
1400a85cb24fSFrançois Tigeot I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1401a85cb24fSFrançois Tigeot ~GT_NOA_ENABLE));
1402a85cb24fSFrançois Tigeot }
1403a85cb24fSFrançois Tigeot
1404*3f2dd94aSFrançois Tigeot /*
1405*3f2dd94aSFrançois Tigeot * NB: It must always remain pointer safe to run this even if the OA unit
1406*3f2dd94aSFrançois Tigeot * has been disabled.
1407*3f2dd94aSFrançois Tigeot *
1408*3f2dd94aSFrançois Tigeot * It's fine to put out-of-date values into these per-context registers
1409*3f2dd94aSFrançois Tigeot * in the case that the OA unit has been disabled.
1410*3f2dd94aSFrançois Tigeot */
1411*3f2dd94aSFrançois Tigeot static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
1412*3f2dd94aSFrançois Tigeot u32 *reg_state,
1413*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1414a85cb24fSFrançois Tigeot {
1415*3f2dd94aSFrançois Tigeot struct drm_i915_private *dev_priv = ctx->i915;
1416*3f2dd94aSFrançois Tigeot u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
1417*3f2dd94aSFrançois Tigeot u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
1418*3f2dd94aSFrançois Tigeot /* The MMIO offsets for Flex EU registers aren't contiguous */
1419*3f2dd94aSFrançois Tigeot u32 flex_mmio[] = {
1420*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL0),
1421*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL1),
1422*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL2),
1423*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL3),
1424*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL4),
1425*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL5),
1426*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL6),
1427*3f2dd94aSFrançois Tigeot };
1428*3f2dd94aSFrançois Tigeot int i;
1429*3f2dd94aSFrançois Tigeot
1430*3f2dd94aSFrançois Tigeot reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1431*3f2dd94aSFrançois Tigeot reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
1432*3f2dd94aSFrançois Tigeot GEN8_OA_TIMER_PERIOD_SHIFT) |
1433*3f2dd94aSFrançois Tigeot (dev_priv->perf.oa.periodic ?
1434*3f2dd94aSFrançois Tigeot GEN8_OA_TIMER_ENABLE : 0) |
1435*3f2dd94aSFrançois Tigeot GEN8_OA_COUNTER_RESUME;
1436*3f2dd94aSFrançois Tigeot
1437*3f2dd94aSFrançois Tigeot for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1438*3f2dd94aSFrançois Tigeot u32 state_offset = ctx_flexeu0 + i * 2;
1439*3f2dd94aSFrançois Tigeot u32 mmio = flex_mmio[i];
1440*3f2dd94aSFrançois Tigeot
1441*3f2dd94aSFrançois Tigeot /*
1442*3f2dd94aSFrançois Tigeot * This arbitrary default will select the 'EU FPU0 Pipeline
1443*3f2dd94aSFrançois Tigeot * Active' event. In the future it's anticipated that there
1444*3f2dd94aSFrançois Tigeot * will be an explicit 'No Event' we can select, but not yet...
1445*3f2dd94aSFrançois Tigeot */
1446*3f2dd94aSFrançois Tigeot u32 value = 0;
1447*3f2dd94aSFrançois Tigeot
1448*3f2dd94aSFrançois Tigeot if (oa_config) {
1449*3f2dd94aSFrançois Tigeot u32 j;
1450*3f2dd94aSFrançois Tigeot
1451*3f2dd94aSFrançois Tigeot for (j = 0; j < oa_config->flex_regs_len; j++) {
1452*3f2dd94aSFrançois Tigeot if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1453*3f2dd94aSFrançois Tigeot value = oa_config->flex_regs[j].value;
1454*3f2dd94aSFrançois Tigeot break;
1455*3f2dd94aSFrançois Tigeot }
1456*3f2dd94aSFrançois Tigeot }
1457*3f2dd94aSFrançois Tigeot }
1458*3f2dd94aSFrançois Tigeot
1459*3f2dd94aSFrançois Tigeot reg_state[state_offset] = mmio;
1460*3f2dd94aSFrançois Tigeot reg_state[state_offset+1] = value;
1461*3f2dd94aSFrançois Tigeot }
1462*3f2dd94aSFrançois Tigeot }
1463*3f2dd94aSFrançois Tigeot
1464*3f2dd94aSFrançois Tigeot /*
1465*3f2dd94aSFrançois Tigeot * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
1466*3f2dd94aSFrançois Tigeot * is only used by the kernel context.
1467*3f2dd94aSFrançois Tigeot */
1468*3f2dd94aSFrançois Tigeot static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
1469*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1470*3f2dd94aSFrançois Tigeot {
1471*3f2dd94aSFrançois Tigeot struct drm_i915_private *dev_priv = req->i915;
1472*3f2dd94aSFrançois Tigeot /* The MMIO offsets for Flex EU registers aren't contiguous */
1473*3f2dd94aSFrançois Tigeot u32 flex_mmio[] = {
1474*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL0),
1475*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL1),
1476*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL2),
1477*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL3),
1478*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL4),
1479*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL5),
1480*3f2dd94aSFrançois Tigeot i915_mmio_reg_offset(EU_PERF_CNTL6),
1481*3f2dd94aSFrançois Tigeot };
1482*3f2dd94aSFrançois Tigeot u32 *cs;
1483*3f2dd94aSFrançois Tigeot int i;
1484*3f2dd94aSFrançois Tigeot
1485*3f2dd94aSFrançois Tigeot cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4);
1486*3f2dd94aSFrançois Tigeot if (IS_ERR(cs))
1487*3f2dd94aSFrançois Tigeot return PTR_ERR(cs);
1488*3f2dd94aSFrançois Tigeot
1489*3f2dd94aSFrançois Tigeot *cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
1490*3f2dd94aSFrançois Tigeot
1491*3f2dd94aSFrançois Tigeot *cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1492*3f2dd94aSFrançois Tigeot *cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
1493*3f2dd94aSFrançois Tigeot (dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
1494*3f2dd94aSFrançois Tigeot GEN8_OA_COUNTER_RESUME;
1495*3f2dd94aSFrançois Tigeot
1496*3f2dd94aSFrançois Tigeot for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1497*3f2dd94aSFrançois Tigeot u32 mmio = flex_mmio[i];
1498*3f2dd94aSFrançois Tigeot
1499*3f2dd94aSFrançois Tigeot /*
1500*3f2dd94aSFrançois Tigeot * This arbitrary default will select the 'EU FPU0 Pipeline
1501*3f2dd94aSFrançois Tigeot * Active' event. In the future it's anticipated that there
1502*3f2dd94aSFrançois Tigeot * will be an explicit 'No Event' we can select, but not
1503*3f2dd94aSFrançois Tigeot * yet...
1504*3f2dd94aSFrançois Tigeot */
1505*3f2dd94aSFrançois Tigeot u32 value = 0;
1506*3f2dd94aSFrançois Tigeot
1507*3f2dd94aSFrançois Tigeot if (oa_config) {
1508*3f2dd94aSFrançois Tigeot u32 j;
1509*3f2dd94aSFrançois Tigeot
1510*3f2dd94aSFrançois Tigeot for (j = 0; j < oa_config->flex_regs_len; j++) {
1511*3f2dd94aSFrançois Tigeot if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1512*3f2dd94aSFrançois Tigeot value = oa_config->flex_regs[j].value;
1513*3f2dd94aSFrançois Tigeot break;
1514*3f2dd94aSFrançois Tigeot }
1515*3f2dd94aSFrançois Tigeot }
1516*3f2dd94aSFrançois Tigeot }
1517*3f2dd94aSFrançois Tigeot
1518*3f2dd94aSFrançois Tigeot *cs++ = mmio;
1519*3f2dd94aSFrançois Tigeot *cs++ = value;
1520*3f2dd94aSFrançois Tigeot }
1521*3f2dd94aSFrançois Tigeot
1522*3f2dd94aSFrançois Tigeot *cs++ = MI_NOOP;
1523*3f2dd94aSFrançois Tigeot intel_ring_advance(req, cs);
1524*3f2dd94aSFrançois Tigeot
1525*3f2dd94aSFrançois Tigeot return 0;
1526*3f2dd94aSFrançois Tigeot }
1527*3f2dd94aSFrançois Tigeot
1528*3f2dd94aSFrançois Tigeot static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
1529*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1530*3f2dd94aSFrançois Tigeot {
1531*3f2dd94aSFrançois Tigeot struct intel_engine_cs *engine = dev_priv->engine[RCS];
1532*3f2dd94aSFrançois Tigeot struct i915_gem_timeline *timeline;
1533*3f2dd94aSFrançois Tigeot struct drm_i915_gem_request *req;
1534*3f2dd94aSFrançois Tigeot int ret;
1535*3f2dd94aSFrançois Tigeot
1536*3f2dd94aSFrançois Tigeot lockdep_assert_held(&dev_priv->drm.struct_mutex);
1537*3f2dd94aSFrançois Tigeot
1538*3f2dd94aSFrançois Tigeot i915_gem_retire_requests(dev_priv);
1539*3f2dd94aSFrançois Tigeot
1540*3f2dd94aSFrançois Tigeot req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
1541*3f2dd94aSFrançois Tigeot if (IS_ERR(req))
1542*3f2dd94aSFrançois Tigeot return PTR_ERR(req);
1543*3f2dd94aSFrançois Tigeot
1544*3f2dd94aSFrançois Tigeot ret = gen8_emit_oa_config(req, oa_config);
1545*3f2dd94aSFrançois Tigeot if (ret) {
1546*3f2dd94aSFrançois Tigeot i915_add_request(req);
1547*3f2dd94aSFrançois Tigeot return ret;
1548*3f2dd94aSFrançois Tigeot }
1549*3f2dd94aSFrançois Tigeot
1550*3f2dd94aSFrançois Tigeot /* Queue this switch after all other activity */
1551*3f2dd94aSFrançois Tigeot list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
1552*3f2dd94aSFrançois Tigeot struct drm_i915_gem_request *prev;
1553*3f2dd94aSFrançois Tigeot struct intel_timeline *tl;
1554*3f2dd94aSFrançois Tigeot
1555*3f2dd94aSFrançois Tigeot tl = &timeline->engine[engine->id];
1556*3f2dd94aSFrançois Tigeot prev = i915_gem_active_raw(&tl->last_request,
1557*3f2dd94aSFrançois Tigeot &dev_priv->drm.struct_mutex);
1558*3f2dd94aSFrançois Tigeot if (prev)
1559*3f2dd94aSFrançois Tigeot i915_sw_fence_await_sw_fence_gfp(&req->submit,
1560*3f2dd94aSFrançois Tigeot &prev->submit,
1561*3f2dd94aSFrançois Tigeot GFP_KERNEL);
1562*3f2dd94aSFrançois Tigeot }
1563*3f2dd94aSFrançois Tigeot
1564*3f2dd94aSFrançois Tigeot ret = i915_switch_context(req);
1565*3f2dd94aSFrançois Tigeot i915_add_request(req);
1566*3f2dd94aSFrançois Tigeot
1567*3f2dd94aSFrançois Tigeot return ret;
1568*3f2dd94aSFrançois Tigeot }
1569*3f2dd94aSFrançois Tigeot
1570*3f2dd94aSFrançois Tigeot /*
1571*3f2dd94aSFrançois Tigeot * Manages updating the per-context aspects of the OA stream
1572*3f2dd94aSFrançois Tigeot * configuration across all contexts.
1573*3f2dd94aSFrançois Tigeot *
1574*3f2dd94aSFrançois Tigeot * The awkward consideration here is that OACTXCONTROL controls the
1575*3f2dd94aSFrançois Tigeot * exponent for periodic sampling which is primarily used for system
1576*3f2dd94aSFrançois Tigeot * wide profiling where we'd like a consistent sampling period even in
1577*3f2dd94aSFrançois Tigeot * the face of context switches.
1578*3f2dd94aSFrançois Tigeot *
1579*3f2dd94aSFrançois Tigeot * Our approach of updating the register state context (as opposed to
1580*3f2dd94aSFrançois Tigeot * say using a workaround batch buffer) ensures that the hardware
1581*3f2dd94aSFrançois Tigeot * won't automatically reload an out-of-date timer exponent even
1582*3f2dd94aSFrançois Tigeot * transiently before a WA BB could be parsed.
1583*3f2dd94aSFrançois Tigeot *
1584*3f2dd94aSFrançois Tigeot * This function needs to:
1585*3f2dd94aSFrançois Tigeot * - Ensure the currently running context's per-context OA state is
1586*3f2dd94aSFrançois Tigeot * updated
1587*3f2dd94aSFrançois Tigeot * - Ensure that all existing contexts will have the correct per-context
1588*3f2dd94aSFrançois Tigeot * OA state if they are scheduled for use.
1589*3f2dd94aSFrançois Tigeot * - Ensure any new contexts will be initialized with the correct
1590*3f2dd94aSFrançois Tigeot * per-context OA state.
1591*3f2dd94aSFrançois Tigeot *
1592*3f2dd94aSFrançois Tigeot * Note: it's only the RCS/Render context that has any OA state.
1593*3f2dd94aSFrançois Tigeot */
1594*3f2dd94aSFrançois Tigeot static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
1595*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1596*3f2dd94aSFrançois Tigeot {
1597*3f2dd94aSFrançois Tigeot struct i915_gem_context *ctx;
1598*3f2dd94aSFrançois Tigeot int ret;
1599*3f2dd94aSFrançois Tigeot unsigned int wait_flags = I915_WAIT_LOCKED;
1600*3f2dd94aSFrançois Tigeot
1601*3f2dd94aSFrançois Tigeot lockdep_assert_held(&dev_priv->drm.struct_mutex);
1602*3f2dd94aSFrançois Tigeot
1603*3f2dd94aSFrançois Tigeot /* Switch away from any user context. */
1604*3f2dd94aSFrançois Tigeot ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
1605*3f2dd94aSFrançois Tigeot if (ret)
1606*3f2dd94aSFrançois Tigeot goto out;
1607*3f2dd94aSFrançois Tigeot
1608*3f2dd94aSFrançois Tigeot /*
1609*3f2dd94aSFrançois Tigeot * The OA register config is setup through the context image. This image
1610*3f2dd94aSFrançois Tigeot * might be written to by the GPU on context switch (in particular on
1611*3f2dd94aSFrançois Tigeot * lite-restore). This means we can't safely update a context's image,
1612*3f2dd94aSFrançois Tigeot * if this context is scheduled/submitted to run on the GPU.
1613*3f2dd94aSFrançois Tigeot *
1614*3f2dd94aSFrançois Tigeot * We could emit the OA register config through the batch buffer but
1615*3f2dd94aSFrançois Tigeot * this might leave small interval of time where the OA unit is
1616*3f2dd94aSFrançois Tigeot * configured at an invalid sampling period.
1617*3f2dd94aSFrançois Tigeot *
1618*3f2dd94aSFrançois Tigeot * So far the best way to work around this issue seems to be draining
1619*3f2dd94aSFrançois Tigeot * the GPU from any submitted work.
1620*3f2dd94aSFrançois Tigeot */
1621*3f2dd94aSFrançois Tigeot ret = i915_gem_wait_for_idle(dev_priv, wait_flags);
1622*3f2dd94aSFrançois Tigeot if (ret)
1623*3f2dd94aSFrançois Tigeot goto out;
1624*3f2dd94aSFrançois Tigeot
1625*3f2dd94aSFrançois Tigeot /* Update all contexts now that we've stalled the submission. */
1626*3f2dd94aSFrançois Tigeot list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
1627*3f2dd94aSFrançois Tigeot struct intel_context *ce = &ctx->engine[RCS];
1628*3f2dd94aSFrançois Tigeot u32 *regs;
1629*3f2dd94aSFrançois Tigeot
1630*3f2dd94aSFrançois Tigeot /* OA settings will be set upon first use */
1631*3f2dd94aSFrançois Tigeot if (!ce->state)
1632*3f2dd94aSFrançois Tigeot continue;
1633*3f2dd94aSFrançois Tigeot
1634*3f2dd94aSFrançois Tigeot regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
1635*3f2dd94aSFrançois Tigeot if (IS_ERR(regs)) {
1636*3f2dd94aSFrançois Tigeot ret = PTR_ERR(regs);
1637*3f2dd94aSFrançois Tigeot goto out;
1638*3f2dd94aSFrançois Tigeot }
1639*3f2dd94aSFrançois Tigeot
1640*3f2dd94aSFrançois Tigeot ce->state->obj->mm.dirty = true;
1641*3f2dd94aSFrançois Tigeot regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
1642*3f2dd94aSFrançois Tigeot
1643*3f2dd94aSFrançois Tigeot gen8_update_reg_state_unlocked(ctx, regs, oa_config);
1644*3f2dd94aSFrançois Tigeot
1645*3f2dd94aSFrançois Tigeot i915_gem_object_unpin_map(ce->state->obj);
1646*3f2dd94aSFrançois Tigeot }
1647*3f2dd94aSFrançois Tigeot
1648*3f2dd94aSFrançois Tigeot out:
1649*3f2dd94aSFrançois Tigeot return ret;
1650*3f2dd94aSFrançois Tigeot }
1651*3f2dd94aSFrançois Tigeot
1652*3f2dd94aSFrançois Tigeot static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
1653*3f2dd94aSFrançois Tigeot const struct i915_oa_config *oa_config)
1654*3f2dd94aSFrançois Tigeot {
1655*3f2dd94aSFrançois Tigeot int ret;
1656*3f2dd94aSFrançois Tigeot
1657*3f2dd94aSFrançois Tigeot /*
1658*3f2dd94aSFrançois Tigeot * We disable slice/unslice clock ratio change reports on SKL since
1659*3f2dd94aSFrançois Tigeot * they are too noisy. The HW generates a lot of redundant reports
1660*3f2dd94aSFrançois Tigeot * where the ratio hasn't really changed causing a lot of redundant
1661*3f2dd94aSFrançois Tigeot * work to processes and increasing the chances we'll hit buffer
1662*3f2dd94aSFrançois Tigeot * overruns.
1663*3f2dd94aSFrançois Tigeot *
1664*3f2dd94aSFrançois Tigeot * Although we don't currently use the 'disable overrun' OABUFFER
1665*3f2dd94aSFrançois Tigeot * feature it's worth noting that clock ratio reports have to be
1666*3f2dd94aSFrançois Tigeot * disabled before considering to use that feature since the HW doesn't
1667*3f2dd94aSFrançois Tigeot * correctly block these reports.
1668*3f2dd94aSFrançois Tigeot *
1669*3f2dd94aSFrançois Tigeot * Currently none of the high-level metrics we have depend on knowing
1670*3f2dd94aSFrançois Tigeot * this ratio to normalize.
1671*3f2dd94aSFrançois Tigeot *
1672*3f2dd94aSFrançois Tigeot * Note: This register is not power context saved and restored, but
1673*3f2dd94aSFrançois Tigeot * that's OK considering that we disable RC6 while the OA unit is
1674*3f2dd94aSFrançois Tigeot * enabled.
1675*3f2dd94aSFrançois Tigeot *
1676*3f2dd94aSFrançois Tigeot * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
1677*3f2dd94aSFrançois Tigeot * be read back from automatically triggered reports, as part of the
1678*3f2dd94aSFrançois Tigeot * RPT_ID field.
1679*3f2dd94aSFrançois Tigeot */
1680*3f2dd94aSFrançois Tigeot if (IS_GEN9(dev_priv)) {
1681*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OA_DEBUG,
1682*3f2dd94aSFrançois Tigeot _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
1683*3f2dd94aSFrançois Tigeot GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
1684*3f2dd94aSFrançois Tigeot }
1685*3f2dd94aSFrançois Tigeot
1686*3f2dd94aSFrançois Tigeot /*
1687*3f2dd94aSFrançois Tigeot * Update all contexts prior writing the mux configurations as we need
1688*3f2dd94aSFrançois Tigeot * to make sure all slices/subslices are ON before writing to NOA
1689*3f2dd94aSFrançois Tigeot * registers.
1690*3f2dd94aSFrançois Tigeot */
1691*3f2dd94aSFrançois Tigeot ret = gen8_configure_all_contexts(dev_priv, oa_config);
1692*3f2dd94aSFrançois Tigeot if (ret)
1693*3f2dd94aSFrançois Tigeot return ret;
1694*3f2dd94aSFrançois Tigeot
1695*3f2dd94aSFrançois Tigeot config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1696*3f2dd94aSFrançois Tigeot
1697*3f2dd94aSFrançois Tigeot config_oa_regs(dev_priv, oa_config->b_counter_regs,
1698*3f2dd94aSFrançois Tigeot oa_config->b_counter_regs_len);
1699*3f2dd94aSFrançois Tigeot
1700*3f2dd94aSFrançois Tigeot return 0;
1701*3f2dd94aSFrançois Tigeot }
1702*3f2dd94aSFrançois Tigeot
1703*3f2dd94aSFrançois Tigeot static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
1704*3f2dd94aSFrançois Tigeot {
1705*3f2dd94aSFrançois Tigeot /* Reset all contexts' slices/subslices configurations. */
1706*3f2dd94aSFrançois Tigeot gen8_configure_all_contexts(dev_priv, NULL);
1707*3f2dd94aSFrançois Tigeot
1708*3f2dd94aSFrançois Tigeot I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1709*3f2dd94aSFrançois Tigeot ~GT_NOA_ENABLE));
1710*3f2dd94aSFrançois Tigeot
1711*3f2dd94aSFrançois Tigeot }
1712*3f2dd94aSFrançois Tigeot
1713*3f2dd94aSFrançois Tigeot static void gen7_oa_enable(struct drm_i915_private *dev_priv)
1714*3f2dd94aSFrançois Tigeot {
1715*3f2dd94aSFrançois Tigeot /*
1716*3f2dd94aSFrançois Tigeot * Reset buf pointers so we don't forward reports from before now.
1717*3f2dd94aSFrançois Tigeot *
1718*3f2dd94aSFrançois Tigeot * Think carefully if considering trying to avoid this, since it
1719*3f2dd94aSFrançois Tigeot * also ensures status flags and the buffer itself are cleared
1720*3f2dd94aSFrançois Tigeot * in error paths, and we have checks for invalid reports based
1721*3f2dd94aSFrançois Tigeot * on the assumption that certain fields are written to zeroed
1722*3f2dd94aSFrançois Tigeot * memory which this helps maintains.
1723*3f2dd94aSFrançois Tigeot */
1724*3f2dd94aSFrançois Tigeot gen7_init_oa_buffer(dev_priv);
1725a85cb24fSFrançois Tigeot
1726a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.exclusive_stream->enabled) {
1727a85cb24fSFrançois Tigeot struct i915_gem_context *ctx =
1728a85cb24fSFrançois Tigeot dev_priv->perf.oa.exclusive_stream->ctx;
1729a85cb24fSFrançois Tigeot u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
1730a85cb24fSFrançois Tigeot
1731a85cb24fSFrançois Tigeot bool periodic = dev_priv->perf.oa.periodic;
1732a85cb24fSFrançois Tigeot u32 period_exponent = dev_priv->perf.oa.period_exponent;
1733a85cb24fSFrançois Tigeot u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1734a85cb24fSFrançois Tigeot
1735a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OACONTROL,
1736a85cb24fSFrançois Tigeot (ctx_id & GEN7_OACONTROL_CTX_MASK) |
1737a85cb24fSFrançois Tigeot (period_exponent <<
1738a85cb24fSFrançois Tigeot GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
1739a85cb24fSFrançois Tigeot (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
1740a85cb24fSFrançois Tigeot (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
1741a85cb24fSFrançois Tigeot (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
1742a85cb24fSFrançois Tigeot GEN7_OACONTROL_ENABLE);
1743a85cb24fSFrançois Tigeot } else
1744a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OACONTROL, 0);
1745a85cb24fSFrançois Tigeot }
1746a85cb24fSFrançois Tigeot
1747*3f2dd94aSFrançois Tigeot static void gen8_oa_enable(struct drm_i915_private *dev_priv)
1748a85cb24fSFrançois Tigeot {
1749*3f2dd94aSFrançois Tigeot u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1750a85cb24fSFrançois Tigeot
1751*3f2dd94aSFrançois Tigeot /*
1752*3f2dd94aSFrançois Tigeot * Reset buf pointers so we don't forward reports from before now.
1753a85cb24fSFrançois Tigeot *
1754a85cb24fSFrançois Tigeot * Think carefully if considering trying to avoid this, since it
1755a85cb24fSFrançois Tigeot * also ensures status flags and the buffer itself are cleared
1756a85cb24fSFrançois Tigeot * in error paths, and we have checks for invalid reports based
1757a85cb24fSFrançois Tigeot * on the assumption that certain fields are written to zeroed
1758a85cb24fSFrançois Tigeot * memory which this helps maintains.
1759a85cb24fSFrançois Tigeot */
1760*3f2dd94aSFrançois Tigeot gen8_init_oa_buffer(dev_priv);
1761a85cb24fSFrançois Tigeot
1762*3f2dd94aSFrançois Tigeot /*
1763*3f2dd94aSFrançois Tigeot * Note: we don't rely on the hardware to perform single context
1764*3f2dd94aSFrançois Tigeot * filtering and instead filter on the cpu based on the context-id
1765*3f2dd94aSFrançois Tigeot * field of reports
1766*3f2dd94aSFrançois Tigeot */
1767*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OACONTROL, (report_format <<
1768*3f2dd94aSFrançois Tigeot GEN8_OA_REPORT_FORMAT_SHIFT) |
1769*3f2dd94aSFrançois Tigeot GEN8_OA_COUNTER_ENABLE);
1770a85cb24fSFrançois Tigeot }
1771a85cb24fSFrançois Tigeot
1772a85cb24fSFrançois Tigeot /**
1773a85cb24fSFrançois Tigeot * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
1774a85cb24fSFrançois Tigeot * @stream: An i915 perf stream opened for OA metrics
1775a85cb24fSFrançois Tigeot *
1776a85cb24fSFrançois Tigeot * [Re]enables hardware periodic sampling according to the period configured
1777a85cb24fSFrançois Tigeot * when opening the stream. This also starts a hrtimer that will periodically
1778a85cb24fSFrançois Tigeot * check for data in the circular OA buffer for notifying userspace (e.g.
1779a85cb24fSFrançois Tigeot * during a read() or poll()).
1780a85cb24fSFrançois Tigeot */
1781a85cb24fSFrançois Tigeot static void i915_oa_stream_enable(struct i915_perf_stream *stream)
1782a85cb24fSFrançois Tigeot {
1783a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1784a85cb24fSFrançois Tigeot
1785a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable(dev_priv);
1786a85cb24fSFrançois Tigeot
1787a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.periodic)
1788a85cb24fSFrançois Tigeot hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
1789a85cb24fSFrançois Tigeot ns_to_ktime(POLL_PERIOD),
1790a85cb24fSFrançois Tigeot HRTIMER_MODE_REL_PINNED);
1791a85cb24fSFrançois Tigeot }
1792a85cb24fSFrançois Tigeot
1793a85cb24fSFrançois Tigeot static void gen7_oa_disable(struct drm_i915_private *dev_priv)
1794a85cb24fSFrançois Tigeot {
1795a85cb24fSFrançois Tigeot I915_WRITE(GEN7_OACONTROL, 0);
1796a85cb24fSFrançois Tigeot }
1797a85cb24fSFrançois Tigeot
1798*3f2dd94aSFrançois Tigeot static void gen8_oa_disable(struct drm_i915_private *dev_priv)
1799*3f2dd94aSFrançois Tigeot {
1800*3f2dd94aSFrançois Tigeot I915_WRITE(GEN8_OACONTROL, 0);
1801*3f2dd94aSFrançois Tigeot }
1802*3f2dd94aSFrançois Tigeot
1803a85cb24fSFrançois Tigeot /**
1804a85cb24fSFrançois Tigeot * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
1805a85cb24fSFrançois Tigeot * @stream: An i915 perf stream opened for OA metrics
1806a85cb24fSFrançois Tigeot *
1807a85cb24fSFrançois Tigeot * Stops the OA unit from periodically writing counter reports into the
1808a85cb24fSFrançois Tigeot * circular OA buffer. This also stops the hrtimer that periodically checks for
1809a85cb24fSFrançois Tigeot * data in the circular OA buffer, for notifying userspace.
1810a85cb24fSFrançois Tigeot */
1811a85cb24fSFrançois Tigeot static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1812a85cb24fSFrançois Tigeot {
1813a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1814a85cb24fSFrançois Tigeot
1815a85cb24fSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable(dev_priv);
1816a85cb24fSFrançois Tigeot
1817a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.periodic)
1818a85cb24fSFrançois Tigeot hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1819a85cb24fSFrançois Tigeot }
1820a85cb24fSFrançois Tigeot
1821a85cb24fSFrançois Tigeot static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
1822a85cb24fSFrançois Tigeot {
1823a85cb24fSFrançois Tigeot return div_u64(1000000000ULL * (2ULL << exponent),
1824a85cb24fSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency);
1825a85cb24fSFrançois Tigeot }
1826a85cb24fSFrançois Tigeot
1827a85cb24fSFrançois Tigeot static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1828a85cb24fSFrançois Tigeot .destroy = i915_oa_stream_destroy,
1829a85cb24fSFrançois Tigeot .enable = i915_oa_stream_enable,
1830a85cb24fSFrançois Tigeot .disable = i915_oa_stream_disable,
1831a85cb24fSFrançois Tigeot .wait_unlocked = i915_oa_wait_unlocked,
1832a85cb24fSFrançois Tigeot .poll_wait = i915_oa_poll_wait,
1833a85cb24fSFrançois Tigeot .read = i915_oa_read,
1834a85cb24fSFrançois Tigeot };
1835a85cb24fSFrançois Tigeot
1836a85cb24fSFrançois Tigeot /**
1837a85cb24fSFrançois Tigeot * i915_oa_stream_init - validate combined props for OA stream and init
1838a85cb24fSFrançois Tigeot * @stream: An i915 perf stream
1839a85cb24fSFrançois Tigeot * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
1840a85cb24fSFrançois Tigeot * @props: The property state that configures stream (individually validated)
1841a85cb24fSFrançois Tigeot *
1842a85cb24fSFrançois Tigeot * While read_properties_unlocked() validates properties in isolation it
1843a85cb24fSFrançois Tigeot * doesn't ensure that the combination necessarily makes sense.
1844a85cb24fSFrançois Tigeot *
1845a85cb24fSFrançois Tigeot * At this point it has been determined that userspace wants a stream of
1846a85cb24fSFrançois Tigeot * OA metrics, but still we need to further validate the combined
1847a85cb24fSFrançois Tigeot * properties are OK.
1848a85cb24fSFrançois Tigeot *
1849a85cb24fSFrançois Tigeot * If the configuration makes sense then we can allocate memory for
1850a85cb24fSFrançois Tigeot * a circular OA buffer and apply the requested metric set configuration.
1851a85cb24fSFrançois Tigeot *
1852a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code.
1853a85cb24fSFrançois Tigeot */
1854a85cb24fSFrançois Tigeot static int i915_oa_stream_init(struct i915_perf_stream *stream,
1855a85cb24fSFrançois Tigeot struct drm_i915_perf_open_param *param,
1856a85cb24fSFrançois Tigeot struct perf_open_properties *props)
1857a85cb24fSFrançois Tigeot {
1858a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
1859a85cb24fSFrançois Tigeot int format_size;
1860a85cb24fSFrançois Tigeot int ret;
1861a85cb24fSFrançois Tigeot
1862a85cb24fSFrançois Tigeot /* If the sysfs metrics/ directory wasn't registered for some
1863a85cb24fSFrançois Tigeot * reason then don't let userspace try their luck with config
1864a85cb24fSFrançois Tigeot * IDs
1865a85cb24fSFrançois Tigeot */
1866a85cb24fSFrançois Tigeot if (!dev_priv->perf.metrics_kobj) {
1867a85cb24fSFrançois Tigeot DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
1868a85cb24fSFrançois Tigeot return -EINVAL;
1869a85cb24fSFrançois Tigeot }
1870a85cb24fSFrançois Tigeot
1871a85cb24fSFrançois Tigeot if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
1872a85cb24fSFrançois Tigeot DRM_DEBUG("Only OA report sampling supported\n");
1873a85cb24fSFrançois Tigeot return -EINVAL;
1874a85cb24fSFrançois Tigeot }
1875a85cb24fSFrançois Tigeot
1876a85cb24fSFrançois Tigeot if (!dev_priv->perf.oa.ops.init_oa_buffer) {
1877a85cb24fSFrançois Tigeot DRM_DEBUG("OA unit not supported\n");
1878a85cb24fSFrançois Tigeot return -ENODEV;
1879a85cb24fSFrançois Tigeot }
1880a85cb24fSFrançois Tigeot
1881a85cb24fSFrançois Tigeot /* To avoid the complexity of having to accurately filter
1882a85cb24fSFrançois Tigeot * counter reports and marshal to the appropriate client
1883a85cb24fSFrançois Tigeot * we currently only allow exclusive access
1884a85cb24fSFrançois Tigeot */
1885a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.exclusive_stream) {
1886a85cb24fSFrançois Tigeot DRM_DEBUG("OA unit already in use\n");
1887a85cb24fSFrançois Tigeot return -EBUSY;
1888a85cb24fSFrançois Tigeot }
1889a85cb24fSFrançois Tigeot
1890a85cb24fSFrançois Tigeot if (!props->oa_format) {
1891a85cb24fSFrançois Tigeot DRM_DEBUG("OA report format not specified\n");
1892a85cb24fSFrançois Tigeot return -EINVAL;
1893a85cb24fSFrançois Tigeot }
1894a85cb24fSFrançois Tigeot
1895a85cb24fSFrançois Tigeot stream->sample_size = sizeof(struct drm_i915_perf_record_header);
1896a85cb24fSFrançois Tigeot
1897a85cb24fSFrançois Tigeot format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
1898a85cb24fSFrançois Tigeot
1899a85cb24fSFrançois Tigeot stream->sample_flags |= SAMPLE_OA_REPORT;
1900a85cb24fSFrançois Tigeot stream->sample_size += format_size;
1901a85cb24fSFrançois Tigeot
1902a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.format_size = format_size;
1903a85cb24fSFrançois Tigeot if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
1904a85cb24fSFrançois Tigeot return -EINVAL;
1905a85cb24fSFrançois Tigeot
1906a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_buffer.format =
1907a85cb24fSFrançois Tigeot dev_priv->perf.oa.oa_formats[props->oa_format].format;
1908a85cb24fSFrançois Tigeot
1909a85cb24fSFrançois Tigeot dev_priv->perf.oa.periodic = props->oa_periodic;
1910a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.periodic) {
1911a85cb24fSFrançois Tigeot u32 tail;
1912a85cb24fSFrançois Tigeot
1913a85cb24fSFrançois Tigeot dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
1914a85cb24fSFrançois Tigeot
1915a85cb24fSFrançois Tigeot /* See comment for OA_TAIL_MARGIN_NSEC for details
1916a85cb24fSFrançois Tigeot * about this tail_margin...
1917a85cb24fSFrançois Tigeot */
1918a85cb24fSFrançois Tigeot tail = div64_u64(OA_TAIL_MARGIN_NSEC,
1919a85cb24fSFrançois Tigeot oa_exponent_to_ns(dev_priv,
1920a85cb24fSFrançois Tigeot props->oa_period_exponent));
1921a85cb24fSFrançois Tigeot dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
1922a85cb24fSFrançois Tigeot }
1923a85cb24fSFrançois Tigeot
1924a85cb24fSFrançois Tigeot if (stream->ctx) {
1925a85cb24fSFrançois Tigeot ret = oa_get_render_ctx_id(stream);
1926a85cb24fSFrançois Tigeot if (ret)
1927a85cb24fSFrançois Tigeot return ret;
1928a85cb24fSFrançois Tigeot }
1929a85cb24fSFrançois Tigeot
1930*3f2dd94aSFrançois Tigeot ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
1931*3f2dd94aSFrançois Tigeot if (ret)
1932*3f2dd94aSFrançois Tigeot goto err_config;
1933*3f2dd94aSFrançois Tigeot
1934a85cb24fSFrançois Tigeot /* PRM - observability performance counters:
1935a85cb24fSFrançois Tigeot *
1936a85cb24fSFrançois Tigeot * OACONTROL, performance counter enable, note:
1937a85cb24fSFrançois Tigeot *
1938a85cb24fSFrançois Tigeot * "When this bit is set, in order to have coherent counts,
1939a85cb24fSFrançois Tigeot * RC6 power state and trunk clock gating must be disabled.
1940a85cb24fSFrançois Tigeot * This can be achieved by programming MMIO registers as
1941a85cb24fSFrançois Tigeot * 0xA094=0 and 0xA090[31]=1"
1942a85cb24fSFrançois Tigeot *
1943a85cb24fSFrançois Tigeot * In our case we are expecting that taking pm + FORCEWAKE
1944a85cb24fSFrançois Tigeot * references will effectively disable RC6.
1945a85cb24fSFrançois Tigeot */
1946a85cb24fSFrançois Tigeot intel_runtime_pm_get(dev_priv);
1947a85cb24fSFrançois Tigeot intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1948a85cb24fSFrançois Tigeot
1949a85cb24fSFrançois Tigeot ret = alloc_oa_buffer(dev_priv);
1950a85cb24fSFrançois Tigeot if (ret)
1951a85cb24fSFrançois Tigeot goto err_oa_buf_alloc;
1952a85cb24fSFrançois Tigeot
1953*3f2dd94aSFrançois Tigeot ret = alloc_oa_buffer(dev_priv);
1954*3f2dd94aSFrançois Tigeot if (ret)
1955*3f2dd94aSFrançois Tigeot goto err_oa_buf_alloc;
1956*3f2dd94aSFrançois Tigeot
1957*3f2dd94aSFrançois Tigeot ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1958*3f2dd94aSFrançois Tigeot if (ret)
1959*3f2dd94aSFrançois Tigeot goto err_lock;
1960*3f2dd94aSFrançois Tigeot
1961*3f2dd94aSFrançois Tigeot ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
1962*3f2dd94aSFrançois Tigeot stream->oa_config);
1963a85cb24fSFrançois Tigeot if (ret)
1964a85cb24fSFrançois Tigeot goto err_enable;
1965a85cb24fSFrançois Tigeot
1966a85cb24fSFrançois Tigeot stream->ops = &i915_oa_stream_ops;
1967a85cb24fSFrançois Tigeot
1968a85cb24fSFrançois Tigeot dev_priv->perf.oa.exclusive_stream = stream;
1969a85cb24fSFrançois Tigeot
1970*3f2dd94aSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1971*3f2dd94aSFrançois Tigeot
1972a85cb24fSFrançois Tigeot return 0;
1973a85cb24fSFrançois Tigeot
1974a85cb24fSFrançois Tigeot err_enable:
1975*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1976*3f2dd94aSFrançois Tigeot mutex_unlock(&dev_priv->drm.struct_mutex);
1977*3f2dd94aSFrançois Tigeot
1978*3f2dd94aSFrançois Tigeot err_lock:
1979a85cb24fSFrançois Tigeot free_oa_buffer(dev_priv);
1980a85cb24fSFrançois Tigeot
1981a85cb24fSFrançois Tigeot err_oa_buf_alloc:
1982*3f2dd94aSFrançois Tigeot put_oa_config(dev_priv, stream->oa_config);
1983*3f2dd94aSFrançois Tigeot
1984a85cb24fSFrançois Tigeot intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1985a85cb24fSFrançois Tigeot intel_runtime_pm_put(dev_priv);
1986*3f2dd94aSFrançois Tigeot
1987*3f2dd94aSFrançois Tigeot err_config:
1988a85cb24fSFrançois Tigeot if (stream->ctx)
1989a85cb24fSFrançois Tigeot oa_put_render_ctx_id(stream);
1990a85cb24fSFrançois Tigeot
1991a85cb24fSFrançois Tigeot return ret;
1992a85cb24fSFrançois Tigeot }
1993*3f2dd94aSFrançois Tigeot #endif
1994a85cb24fSFrançois Tigeot
i915_oa_init_reg_state(struct intel_engine_cs * engine,struct i915_gem_context * ctx,u32 * reg_state)1995*3f2dd94aSFrançois Tigeot void i915_oa_init_reg_state(struct intel_engine_cs *engine,
1996*3f2dd94aSFrançois Tigeot struct i915_gem_context *ctx,
1997*3f2dd94aSFrançois Tigeot u32 *reg_state)
1998*3f2dd94aSFrançois Tigeot {
1999*3f2dd94aSFrançois Tigeot #if 0
2000*3f2dd94aSFrançois Tigeot struct drm_i915_private *dev_priv = engine->i915;
2001*3f2dd94aSFrançois Tigeot struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream;
2002*3f2dd94aSFrançois Tigeot
2003*3f2dd94aSFrançois Tigeot if (engine->id != RCS)
2004*3f2dd94aSFrançois Tigeot return;
2005*3f2dd94aSFrançois Tigeot
2006*3f2dd94aSFrançois Tigeot if (!dev_priv->perf.initialized)
2007*3f2dd94aSFrançois Tigeot return;
2008*3f2dd94aSFrançois Tigeot
2009*3f2dd94aSFrançois Tigeot gen8_update_reg_state_unlocked(ctx, reg_state);
2010*3f2dd94aSFrançois Tigeot #endif
2011*3f2dd94aSFrançois Tigeot }
2012*3f2dd94aSFrançois Tigeot
2013*3f2dd94aSFrançois Tigeot #if 0
2014a85cb24fSFrançois Tigeot /**
2015a85cb24fSFrançois Tigeot * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
2016a85cb24fSFrançois Tigeot * @stream: An i915 perf stream
2017a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2018a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
2019a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
2020a85cb24fSFrançois Tigeot * @ppos: (inout) file seek position (unused)
2021a85cb24fSFrançois Tigeot *
2022a85cb24fSFrançois Tigeot * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
2023a85cb24fSFrançois Tigeot * ensure that if we've successfully copied any data then reporting that takes
2024a85cb24fSFrançois Tigeot * precedence over any internal error status, so the data isn't lost.
2025a85cb24fSFrançois Tigeot *
2026a85cb24fSFrançois Tigeot * For example ret will be -ENOSPC whenever there is more buffered data than
2027a85cb24fSFrançois Tigeot * can be copied to userspace, but that's only interesting if we weren't able
2028a85cb24fSFrançois Tigeot * to copy some data because it implies the userspace buffer is too small to
2029a85cb24fSFrançois Tigeot * receive a single record (and we never split records).
2030a85cb24fSFrançois Tigeot *
2031a85cb24fSFrançois Tigeot * Another case with ret == -EFAULT is more of a grey area since it would seem
2032a85cb24fSFrançois Tigeot * like bad form for userspace to ask us to overrun its buffer, but the user
2033a85cb24fSFrançois Tigeot * knows best:
2034a85cb24fSFrançois Tigeot *
2035a85cb24fSFrançois Tigeot * http://yarchive.net/comp/linux/partial_reads_writes.html
2036a85cb24fSFrançois Tigeot *
2037a85cb24fSFrançois Tigeot * Returns: The number of bytes copied or a negative error code on failure.
2038a85cb24fSFrançois Tigeot */
2039a85cb24fSFrançois Tigeot static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
2040a85cb24fSFrançois Tigeot struct file *file,
2041a85cb24fSFrançois Tigeot char __user *buf,
2042a85cb24fSFrançois Tigeot size_t count,
2043a85cb24fSFrançois Tigeot loff_t *ppos)
2044a85cb24fSFrançois Tigeot {
2045a85cb24fSFrançois Tigeot /* Note we keep the offset (aka bytes read) separate from any
2046a85cb24fSFrançois Tigeot * error status so that the final check for whether we return
2047a85cb24fSFrançois Tigeot * the bytes read with a higher precedence than any error (see
2048a85cb24fSFrançois Tigeot * comment below) doesn't need to be handled/duplicated in
2049a85cb24fSFrançois Tigeot * stream->ops->read() implementations.
2050a85cb24fSFrançois Tigeot */
2051a85cb24fSFrançois Tigeot size_t offset = 0;
2052a85cb24fSFrançois Tigeot int ret = stream->ops->read(stream, buf, count, &offset);
2053a85cb24fSFrançois Tigeot
2054a85cb24fSFrançois Tigeot return offset ?: (ret ?: -EAGAIN);
2055a85cb24fSFrançois Tigeot }
2056a85cb24fSFrançois Tigeot
2057a85cb24fSFrançois Tigeot /**
2058a85cb24fSFrançois Tigeot * i915_perf_read - handles read() FOP for i915 perf stream FDs
2059a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2060a85cb24fSFrançois Tigeot * @buf: destination buffer given by userspace
2061a85cb24fSFrançois Tigeot * @count: the number of bytes userspace wants to read
2062a85cb24fSFrançois Tigeot * @ppos: (inout) file seek position (unused)
2063a85cb24fSFrançois Tigeot *
2064a85cb24fSFrançois Tigeot * The entry point for handling a read() on a stream file descriptor from
2065a85cb24fSFrançois Tigeot * userspace. Most of the work is left to the i915_perf_read_locked() and
2066a85cb24fSFrançois Tigeot * &i915_perf_stream_ops->read but to save having stream implementations (of
2067a85cb24fSFrançois Tigeot * which we might have multiple later) we handle blocking read here.
2068a85cb24fSFrançois Tigeot *
2069a85cb24fSFrançois Tigeot * We can also consistently treat trying to read from a disabled stream
2070a85cb24fSFrançois Tigeot * as an IO error so implementations can assume the stream is enabled
2071a85cb24fSFrançois Tigeot * while reading.
2072a85cb24fSFrançois Tigeot *
2073a85cb24fSFrançois Tigeot * Returns: The number of bytes copied or a negative error code on failure.
2074a85cb24fSFrançois Tigeot */
2075a85cb24fSFrançois Tigeot static ssize_t i915_perf_read(struct file *file,
2076a85cb24fSFrançois Tigeot char __user *buf,
2077a85cb24fSFrançois Tigeot size_t count,
2078a85cb24fSFrançois Tigeot loff_t *ppos)
2079a85cb24fSFrançois Tigeot {
2080a85cb24fSFrançois Tigeot struct i915_perf_stream *stream = file->private_data;
2081a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
2082a85cb24fSFrançois Tigeot ssize_t ret;
2083a85cb24fSFrançois Tigeot
2084a85cb24fSFrançois Tigeot /* To ensure it's handled consistently we simply treat all reads of a
2085a85cb24fSFrançois Tigeot * disabled stream as an error. In particular it might otherwise lead
2086a85cb24fSFrançois Tigeot * to a deadlock for blocking file descriptors...
2087a85cb24fSFrançois Tigeot */
2088a85cb24fSFrançois Tigeot if (!stream->enabled)
2089a85cb24fSFrançois Tigeot return -EIO;
2090a85cb24fSFrançois Tigeot
2091a85cb24fSFrançois Tigeot if (!(file->f_flags & O_NONBLOCK)) {
2092a85cb24fSFrançois Tigeot /* There's the small chance of false positives from
2093a85cb24fSFrançois Tigeot * stream->ops->wait_unlocked.
2094a85cb24fSFrançois Tigeot *
2095a85cb24fSFrançois Tigeot * E.g. with single context filtering since we only wait until
2096a85cb24fSFrançois Tigeot * oabuffer has >= 1 report we don't immediately know whether
2097a85cb24fSFrançois Tigeot * any reports really belong to the current context
2098a85cb24fSFrançois Tigeot */
2099a85cb24fSFrançois Tigeot do {
2100a85cb24fSFrançois Tigeot ret = stream->ops->wait_unlocked(stream);
2101a85cb24fSFrançois Tigeot if (ret)
2102a85cb24fSFrançois Tigeot return ret;
2103a85cb24fSFrançois Tigeot
2104a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2105a85cb24fSFrançois Tigeot ret = i915_perf_read_locked(stream, file,
2106a85cb24fSFrançois Tigeot buf, count, ppos);
2107a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2108a85cb24fSFrançois Tigeot } while (ret == -EAGAIN);
2109a85cb24fSFrançois Tigeot } else {
2110a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2111a85cb24fSFrançois Tigeot ret = i915_perf_read_locked(stream, file, buf, count, ppos);
2112a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2113a85cb24fSFrançois Tigeot }
2114a85cb24fSFrançois Tigeot
2115a85cb24fSFrançois Tigeot if (ret >= 0) {
2116a85cb24fSFrançois Tigeot /* Maybe make ->pollin per-stream state if we support multiple
2117a85cb24fSFrançois Tigeot * concurrent streams in the future.
2118a85cb24fSFrançois Tigeot */
2119a85cb24fSFrançois Tigeot dev_priv->perf.oa.pollin = false;
2120a85cb24fSFrançois Tigeot }
2121a85cb24fSFrançois Tigeot
2122a85cb24fSFrançois Tigeot return ret;
2123a85cb24fSFrançois Tigeot }
2124a85cb24fSFrançois Tigeot
2125a85cb24fSFrançois Tigeot static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
2126a85cb24fSFrançois Tigeot {
2127a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv =
2128a85cb24fSFrançois Tigeot container_of(hrtimer, typeof(*dev_priv),
2129a85cb24fSFrançois Tigeot perf.oa.poll_check_timer);
2130a85cb24fSFrançois Tigeot
2131a85cb24fSFrançois Tigeot if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
2132a85cb24fSFrançois Tigeot dev_priv->perf.oa.pollin = true;
2133a85cb24fSFrançois Tigeot wake_up(&dev_priv->perf.oa.poll_wq);
2134a85cb24fSFrançois Tigeot }
2135a85cb24fSFrançois Tigeot
2136a85cb24fSFrançois Tigeot hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
2137a85cb24fSFrançois Tigeot
2138a85cb24fSFrançois Tigeot return HRTIMER_RESTART;
2139a85cb24fSFrançois Tigeot }
2140a85cb24fSFrançois Tigeot
2141a85cb24fSFrançois Tigeot /**
2142a85cb24fSFrançois Tigeot * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
2143a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2144a85cb24fSFrançois Tigeot * @stream: An i915 perf stream
2145a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2146a85cb24fSFrançois Tigeot * @wait: poll() state table
2147a85cb24fSFrançois Tigeot *
2148a85cb24fSFrançois Tigeot * For handling userspace polling on an i915 perf stream, this calls through to
2149a85cb24fSFrançois Tigeot * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
2150a85cb24fSFrançois Tigeot * will be woken for new stream data.
2151a85cb24fSFrançois Tigeot *
2152a85cb24fSFrançois Tigeot * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2153a85cb24fSFrançois Tigeot * with any non-file-operation driver hooks.
2154a85cb24fSFrançois Tigeot *
2155a85cb24fSFrançois Tigeot * Returns: any poll events that are ready without sleeping
2156a85cb24fSFrançois Tigeot */
2157a85cb24fSFrançois Tigeot static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
2158a85cb24fSFrançois Tigeot struct i915_perf_stream *stream,
2159a85cb24fSFrançois Tigeot struct file *file,
2160a85cb24fSFrançois Tigeot poll_table *wait)
2161a85cb24fSFrançois Tigeot {
2162a85cb24fSFrançois Tigeot unsigned int events = 0;
2163a85cb24fSFrançois Tigeot
2164a85cb24fSFrançois Tigeot stream->ops->poll_wait(stream, file, wait);
2165a85cb24fSFrançois Tigeot
2166a85cb24fSFrançois Tigeot /* Note: we don't explicitly check whether there's something to read
2167a85cb24fSFrançois Tigeot * here since this path may be very hot depending on what else
2168a85cb24fSFrançois Tigeot * userspace is polling, or on the timeout in use. We rely solely on
2169a85cb24fSFrançois Tigeot * the hrtimer/oa_poll_check_timer_cb to notify us when there are
2170a85cb24fSFrançois Tigeot * samples to read.
2171a85cb24fSFrançois Tigeot */
2172a85cb24fSFrançois Tigeot if (dev_priv->perf.oa.pollin)
2173a85cb24fSFrançois Tigeot events |= POLLIN;
2174a85cb24fSFrançois Tigeot
2175a85cb24fSFrançois Tigeot return events;
2176a85cb24fSFrançois Tigeot }
2177a85cb24fSFrançois Tigeot
2178a85cb24fSFrançois Tigeot /**
2179a85cb24fSFrançois Tigeot * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
2180a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2181a85cb24fSFrançois Tigeot * @wait: poll() state table
2182a85cb24fSFrançois Tigeot *
2183a85cb24fSFrançois Tigeot * For handling userspace polling on an i915 perf stream, this ensures
2184a85cb24fSFrançois Tigeot * poll_wait() gets called with a wait queue that will be woken for new stream
2185a85cb24fSFrançois Tigeot * data.
2186a85cb24fSFrançois Tigeot *
2187a85cb24fSFrançois Tigeot * Note: Implementation deferred to i915_perf_poll_locked()
2188a85cb24fSFrançois Tigeot *
2189a85cb24fSFrançois Tigeot * Returns: any poll events that are ready without sleeping
2190a85cb24fSFrançois Tigeot */
2191a85cb24fSFrançois Tigeot static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
2192a85cb24fSFrançois Tigeot {
2193a85cb24fSFrançois Tigeot struct i915_perf_stream *stream = file->private_data;
2194a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
2195a85cb24fSFrançois Tigeot int ret;
2196a85cb24fSFrançois Tigeot
2197a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2198a85cb24fSFrançois Tigeot ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
2199a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2200a85cb24fSFrançois Tigeot
2201a85cb24fSFrançois Tigeot return ret;
2202a85cb24fSFrançois Tigeot }
2203a85cb24fSFrançois Tigeot
2204a85cb24fSFrançois Tigeot /**
2205a85cb24fSFrançois Tigeot * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
2206a85cb24fSFrançois Tigeot * @stream: A disabled i915 perf stream
2207a85cb24fSFrançois Tigeot *
2208a85cb24fSFrançois Tigeot * [Re]enables the associated capture of data for this stream.
2209a85cb24fSFrançois Tigeot *
2210a85cb24fSFrançois Tigeot * If a stream was previously enabled then there's currently no intention
2211a85cb24fSFrançois Tigeot * to provide userspace any guarantee about the preservation of previously
2212a85cb24fSFrançois Tigeot * buffered data.
2213a85cb24fSFrançois Tigeot */
2214a85cb24fSFrançois Tigeot static void i915_perf_enable_locked(struct i915_perf_stream *stream)
2215a85cb24fSFrançois Tigeot {
2216a85cb24fSFrançois Tigeot if (stream->enabled)
2217a85cb24fSFrançois Tigeot return;
2218a85cb24fSFrançois Tigeot
2219a85cb24fSFrançois Tigeot /* Allow stream->ops->enable() to refer to this */
2220a85cb24fSFrançois Tigeot stream->enabled = true;
2221a85cb24fSFrançois Tigeot
2222a85cb24fSFrançois Tigeot if (stream->ops->enable)
2223a85cb24fSFrançois Tigeot stream->ops->enable(stream);
2224a85cb24fSFrançois Tigeot }
2225a85cb24fSFrançois Tigeot
2226a85cb24fSFrançois Tigeot /**
2227a85cb24fSFrançois Tigeot * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
2228a85cb24fSFrançois Tigeot * @stream: An enabled i915 perf stream
2229a85cb24fSFrançois Tigeot *
2230a85cb24fSFrançois Tigeot * Disables the associated capture of data for this stream.
2231a85cb24fSFrançois Tigeot *
2232a85cb24fSFrançois Tigeot * The intention is that disabling an re-enabling a stream will ideally be
2233a85cb24fSFrançois Tigeot * cheaper than destroying and re-opening a stream with the same configuration,
2234a85cb24fSFrançois Tigeot * though there are no formal guarantees about what state or buffered data
2235a85cb24fSFrançois Tigeot * must be retained between disabling and re-enabling a stream.
2236a85cb24fSFrançois Tigeot *
2237a85cb24fSFrançois Tigeot * Note: while a stream is disabled it's considered an error for userspace
2238a85cb24fSFrançois Tigeot * to attempt to read from the stream (-EIO).
2239a85cb24fSFrançois Tigeot */
2240a85cb24fSFrançois Tigeot static void i915_perf_disable_locked(struct i915_perf_stream *stream)
2241a85cb24fSFrançois Tigeot {
2242a85cb24fSFrançois Tigeot if (!stream->enabled)
2243a85cb24fSFrançois Tigeot return;
2244a85cb24fSFrançois Tigeot
2245a85cb24fSFrançois Tigeot /* Allow stream->ops->disable() to refer to this */
2246a85cb24fSFrançois Tigeot stream->enabled = false;
2247a85cb24fSFrançois Tigeot
2248a85cb24fSFrançois Tigeot if (stream->ops->disable)
2249a85cb24fSFrançois Tigeot stream->ops->disable(stream);
2250a85cb24fSFrançois Tigeot }
2251a85cb24fSFrançois Tigeot
2252a85cb24fSFrançois Tigeot /**
2253a85cb24fSFrançois Tigeot * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
2254a85cb24fSFrançois Tigeot * @stream: An i915 perf stream
2255a85cb24fSFrançois Tigeot * @cmd: the ioctl request
2256a85cb24fSFrançois Tigeot * @arg: the ioctl data
2257a85cb24fSFrançois Tigeot *
2258a85cb24fSFrançois Tigeot * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2259a85cb24fSFrançois Tigeot * with any non-file-operation driver hooks.
2260a85cb24fSFrançois Tigeot *
2261a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code. Returns -EINVAL for
2262a85cb24fSFrançois Tigeot * an unknown ioctl request.
2263a85cb24fSFrançois Tigeot */
2264a85cb24fSFrançois Tigeot static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
2265a85cb24fSFrançois Tigeot unsigned int cmd,
2266a85cb24fSFrançois Tigeot unsigned long arg)
2267a85cb24fSFrançois Tigeot {
2268a85cb24fSFrançois Tigeot switch (cmd) {
2269a85cb24fSFrançois Tigeot case I915_PERF_IOCTL_ENABLE:
2270a85cb24fSFrançois Tigeot i915_perf_enable_locked(stream);
2271a85cb24fSFrançois Tigeot return 0;
2272a85cb24fSFrançois Tigeot case I915_PERF_IOCTL_DISABLE:
2273a85cb24fSFrançois Tigeot i915_perf_disable_locked(stream);
2274a85cb24fSFrançois Tigeot return 0;
2275a85cb24fSFrançois Tigeot }
2276a85cb24fSFrançois Tigeot
2277a85cb24fSFrançois Tigeot return -EINVAL;
2278a85cb24fSFrançois Tigeot }
2279a85cb24fSFrançois Tigeot
2280a85cb24fSFrançois Tigeot /**
2281a85cb24fSFrançois Tigeot * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
2282a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2283a85cb24fSFrançois Tigeot * @cmd: the ioctl request
2284a85cb24fSFrançois Tigeot * @arg: the ioctl data
2285a85cb24fSFrançois Tigeot *
2286a85cb24fSFrançois Tigeot * Implementation deferred to i915_perf_ioctl_locked().
2287a85cb24fSFrançois Tigeot *
2288a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code. Returns -EINVAL for
2289a85cb24fSFrançois Tigeot * an unknown ioctl request.
2290a85cb24fSFrançois Tigeot */
2291a85cb24fSFrançois Tigeot static long i915_perf_ioctl(struct file *file,
2292a85cb24fSFrançois Tigeot unsigned int cmd,
2293a85cb24fSFrançois Tigeot unsigned long arg)
2294a85cb24fSFrançois Tigeot {
2295a85cb24fSFrançois Tigeot struct i915_perf_stream *stream = file->private_data;
2296a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
2297a85cb24fSFrançois Tigeot long ret;
2298a85cb24fSFrançois Tigeot
2299a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2300a85cb24fSFrançois Tigeot ret = i915_perf_ioctl_locked(stream, cmd, arg);
2301a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2302a85cb24fSFrançois Tigeot
2303a85cb24fSFrançois Tigeot return ret;
2304a85cb24fSFrançois Tigeot }
2305a85cb24fSFrançois Tigeot
2306a85cb24fSFrançois Tigeot /**
2307a85cb24fSFrançois Tigeot * i915_perf_destroy_locked - destroy an i915 perf stream
2308a85cb24fSFrançois Tigeot * @stream: An i915 perf stream
2309a85cb24fSFrançois Tigeot *
2310a85cb24fSFrançois Tigeot * Frees all resources associated with the given i915 perf @stream, disabling
2311a85cb24fSFrançois Tigeot * any associated data capture in the process.
2312a85cb24fSFrançois Tigeot *
2313a85cb24fSFrançois Tigeot * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2314a85cb24fSFrançois Tigeot * with any non-file-operation driver hooks.
2315a85cb24fSFrançois Tigeot */
2316a85cb24fSFrançois Tigeot static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
2317a85cb24fSFrançois Tigeot {
2318a85cb24fSFrançois Tigeot if (stream->enabled)
2319a85cb24fSFrançois Tigeot i915_perf_disable_locked(stream);
2320a85cb24fSFrançois Tigeot
2321a85cb24fSFrançois Tigeot if (stream->ops->destroy)
2322a85cb24fSFrançois Tigeot stream->ops->destroy(stream);
2323a85cb24fSFrançois Tigeot
2324a85cb24fSFrançois Tigeot list_del(&stream->link);
2325a85cb24fSFrançois Tigeot
2326a85cb24fSFrançois Tigeot if (stream->ctx)
2327*3f2dd94aSFrançois Tigeot i915_gem_context_put(stream->ctx);
2328a85cb24fSFrançois Tigeot
2329a85cb24fSFrançois Tigeot kfree(stream);
2330a85cb24fSFrançois Tigeot }
2331a85cb24fSFrançois Tigeot
2332a85cb24fSFrançois Tigeot /**
2333a85cb24fSFrançois Tigeot * i915_perf_release - handles userspace close() of a stream file
2334a85cb24fSFrançois Tigeot * @inode: anonymous inode associated with file
2335a85cb24fSFrançois Tigeot * @file: An i915 perf stream file
2336a85cb24fSFrançois Tigeot *
2337a85cb24fSFrançois Tigeot * Cleans up any resources associated with an open i915 perf stream file.
2338a85cb24fSFrançois Tigeot *
2339a85cb24fSFrançois Tigeot * NB: close() can't really fail from the userspace point of view.
2340a85cb24fSFrançois Tigeot *
2341a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code.
2342a85cb24fSFrançois Tigeot */
2343a85cb24fSFrançois Tigeot static int i915_perf_release(struct inode *inode, struct file *file)
2344a85cb24fSFrançois Tigeot {
2345a85cb24fSFrançois Tigeot struct i915_perf_stream *stream = file->private_data;
2346a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = stream->dev_priv;
2347a85cb24fSFrançois Tigeot
2348a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2349a85cb24fSFrançois Tigeot i915_perf_destroy_locked(stream);
2350a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2351a85cb24fSFrançois Tigeot
2352a85cb24fSFrançois Tigeot return 0;
2353a85cb24fSFrançois Tigeot }
2354a85cb24fSFrançois Tigeot
2355a85cb24fSFrançois Tigeot
2356a85cb24fSFrançois Tigeot static const struct file_operations fops = {
2357a85cb24fSFrançois Tigeot .owner = THIS_MODULE,
2358a85cb24fSFrançois Tigeot .llseek = no_llseek,
2359a85cb24fSFrançois Tigeot .release = i915_perf_release,
2360a85cb24fSFrançois Tigeot .poll = i915_perf_poll,
2361a85cb24fSFrançois Tigeot .read = i915_perf_read,
2362a85cb24fSFrançois Tigeot .unlocked_ioctl = i915_perf_ioctl,
2363*3f2dd94aSFrançois Tigeot /* Our ioctl have no arguments, so it's safe to use the same function
2364*3f2dd94aSFrançois Tigeot * to handle 32bits compatibility.
2365*3f2dd94aSFrançois Tigeot */
2366*3f2dd94aSFrançois Tigeot .compat_ioctl = i915_perf_ioctl,
2367a85cb24fSFrançois Tigeot };
2368a85cb24fSFrançois Tigeot
2369a85cb24fSFrançois Tigeot
2370a85cb24fSFrançois Tigeot /**
2371a85cb24fSFrançois Tigeot * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
2372a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2373a85cb24fSFrançois Tigeot * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
2374a85cb24fSFrançois Tigeot * @props: individually validated u64 property value pairs
2375a85cb24fSFrançois Tigeot * @file: drm file
2376a85cb24fSFrançois Tigeot *
2377a85cb24fSFrançois Tigeot * See i915_perf_ioctl_open() for interface details.
2378a85cb24fSFrançois Tigeot *
2379a85cb24fSFrançois Tigeot * Implements further stream config validation and stream initialization on
2380a85cb24fSFrançois Tigeot * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
2381a85cb24fSFrançois Tigeot * taken to serialize with any non-file-operation driver hooks.
2382a85cb24fSFrançois Tigeot *
2383a85cb24fSFrançois Tigeot * Note: at this point the @props have only been validated in isolation and
2384a85cb24fSFrançois Tigeot * it's still necessary to validate that the combination of properties makes
2385a85cb24fSFrançois Tigeot * sense.
2386a85cb24fSFrançois Tigeot *
2387a85cb24fSFrançois Tigeot * In the case where userspace is interested in OA unit metrics then further
2388a85cb24fSFrançois Tigeot * config validation and stream initialization details will be handled by
2389a85cb24fSFrançois Tigeot * i915_oa_stream_init(). The code here should only validate config state that
2390a85cb24fSFrançois Tigeot * will be relevant to all stream types / backends.
2391a85cb24fSFrançois Tigeot *
2392a85cb24fSFrançois Tigeot * Returns: zero on success or a negative error code.
2393a85cb24fSFrançois Tigeot */
2394a85cb24fSFrançois Tigeot static int
2395a85cb24fSFrançois Tigeot i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
2396a85cb24fSFrançois Tigeot struct drm_i915_perf_open_param *param,
2397a85cb24fSFrançois Tigeot struct perf_open_properties *props,
2398a85cb24fSFrançois Tigeot struct drm_file *file)
2399a85cb24fSFrançois Tigeot {
2400a85cb24fSFrançois Tigeot struct i915_gem_context *specific_ctx = NULL;
2401a85cb24fSFrançois Tigeot struct i915_perf_stream *stream = NULL;
2402a85cb24fSFrançois Tigeot unsigned long f_flags = 0;
2403*3f2dd94aSFrançois Tigeot bool privileged_op = true;
2404a85cb24fSFrançois Tigeot int stream_fd;
2405a85cb24fSFrançois Tigeot int ret;
2406a85cb24fSFrançois Tigeot
2407a85cb24fSFrançois Tigeot if (props->single_context) {
2408a85cb24fSFrançois Tigeot u32 ctx_handle = props->ctx_handle;
2409a85cb24fSFrançois Tigeot struct drm_i915_file_private *file_priv = file->driver_priv;
2410a85cb24fSFrançois Tigeot
2411*3f2dd94aSFrançois Tigeot specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
2412*3f2dd94aSFrançois Tigeot if (!specific_ctx) {
2413a85cb24fSFrançois Tigeot DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
2414a85cb24fSFrançois Tigeot ctx_handle);
2415*3f2dd94aSFrançois Tigeot ret = -ENOENT;
2416a85cb24fSFrançois Tigeot goto err;
2417a85cb24fSFrançois Tigeot }
2418a85cb24fSFrançois Tigeot }
2419a85cb24fSFrançois Tigeot
2420*3f2dd94aSFrançois Tigeot /*
2421*3f2dd94aSFrançois Tigeot * On Haswell the OA unit supports clock gating off for a specific
2422*3f2dd94aSFrançois Tigeot * context and in this mode there's no visibility of metrics for the
2423*3f2dd94aSFrançois Tigeot * rest of the system, which we consider acceptable for a
2424*3f2dd94aSFrançois Tigeot * non-privileged client.
2425*3f2dd94aSFrançois Tigeot *
2426*3f2dd94aSFrançois Tigeot * For Gen8+ the OA unit no longer supports clock gating off for a
2427*3f2dd94aSFrançois Tigeot * specific context and the kernel can't securely stop the counters
2428*3f2dd94aSFrançois Tigeot * from updating as system-wide / global values. Even though we can
2429*3f2dd94aSFrançois Tigeot * filter reports based on the included context ID we can't block
2430*3f2dd94aSFrançois Tigeot * clients from seeing the raw / global counter values via
2431*3f2dd94aSFrançois Tigeot * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
2432*3f2dd94aSFrançois Tigeot * enable the OA unit by default.
2433*3f2dd94aSFrançois Tigeot */
2434*3f2dd94aSFrançois Tigeot if (IS_HASWELL(dev_priv) && specific_ctx)
2435*3f2dd94aSFrançois Tigeot privileged_op = false;
2436*3f2dd94aSFrançois Tigeot
2437a85cb24fSFrançois Tigeot /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
2438a85cb24fSFrançois Tigeot * we check a dev.i915.perf_stream_paranoid sysctl option
2439a85cb24fSFrançois Tigeot * to determine if it's ok to access system wide OA counters
2440a85cb24fSFrançois Tigeot * without CAP_SYS_ADMIN privileges.
2441a85cb24fSFrançois Tigeot */
2442*3f2dd94aSFrançois Tigeot if (privileged_op &&
2443a85cb24fSFrançois Tigeot i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
2444a85cb24fSFrançois Tigeot DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
2445a85cb24fSFrançois Tigeot ret = -EACCES;
2446a85cb24fSFrançois Tigeot goto err_ctx;
2447a85cb24fSFrançois Tigeot }
2448a85cb24fSFrançois Tigeot
2449a85cb24fSFrançois Tigeot stream = kzalloc(sizeof(*stream), GFP_KERNEL);
2450a85cb24fSFrançois Tigeot if (!stream) {
2451a85cb24fSFrançois Tigeot ret = -ENOMEM;
2452a85cb24fSFrançois Tigeot goto err_ctx;
2453a85cb24fSFrançois Tigeot }
2454a85cb24fSFrançois Tigeot
2455a85cb24fSFrançois Tigeot stream->dev_priv = dev_priv;
2456a85cb24fSFrançois Tigeot stream->ctx = specific_ctx;
2457a85cb24fSFrançois Tigeot
2458a85cb24fSFrançois Tigeot ret = i915_oa_stream_init(stream, param, props);
2459a85cb24fSFrançois Tigeot if (ret)
2460a85cb24fSFrançois Tigeot goto err_alloc;
2461a85cb24fSFrançois Tigeot
2462a85cb24fSFrançois Tigeot /* we avoid simply assigning stream->sample_flags = props->sample_flags
2463a85cb24fSFrançois Tigeot * to have _stream_init check the combination of sample flags more
2464a85cb24fSFrançois Tigeot * thoroughly, but still this is the expected result at this point.
2465a85cb24fSFrançois Tigeot */
2466a85cb24fSFrançois Tigeot if (WARN_ON(stream->sample_flags != props->sample_flags)) {
2467a85cb24fSFrançois Tigeot ret = -ENODEV;
2468a85cb24fSFrançois Tigeot goto err_flags;
2469a85cb24fSFrançois Tigeot }
2470a85cb24fSFrançois Tigeot
2471a85cb24fSFrançois Tigeot list_add(&stream->link, &dev_priv->perf.streams);
2472a85cb24fSFrançois Tigeot
2473a85cb24fSFrançois Tigeot if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
2474a85cb24fSFrançois Tigeot f_flags |= O_CLOEXEC;
2475a85cb24fSFrançois Tigeot if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
2476a85cb24fSFrançois Tigeot f_flags |= O_NONBLOCK;
2477a85cb24fSFrançois Tigeot
2478a85cb24fSFrançois Tigeot stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
2479a85cb24fSFrançois Tigeot if (stream_fd < 0) {
2480a85cb24fSFrançois Tigeot ret = stream_fd;
2481a85cb24fSFrançois Tigeot goto err_open;
2482a85cb24fSFrançois Tigeot }
2483a85cb24fSFrançois Tigeot
2484a85cb24fSFrançois Tigeot if (!(param->flags & I915_PERF_FLAG_DISABLED))
2485a85cb24fSFrançois Tigeot i915_perf_enable_locked(stream);
2486a85cb24fSFrançois Tigeot
2487a85cb24fSFrançois Tigeot return stream_fd;
2488a85cb24fSFrançois Tigeot
2489a85cb24fSFrançois Tigeot err_open:
2490a85cb24fSFrançois Tigeot list_del(&stream->link);
2491a85cb24fSFrançois Tigeot err_flags:
2492a85cb24fSFrançois Tigeot if (stream->ops->destroy)
2493a85cb24fSFrançois Tigeot stream->ops->destroy(stream);
2494a85cb24fSFrançois Tigeot err_alloc:
2495a85cb24fSFrançois Tigeot kfree(stream);
2496a85cb24fSFrançois Tigeot err_ctx:
2497a85cb24fSFrançois Tigeot if (specific_ctx)
2498*3f2dd94aSFrançois Tigeot i915_gem_context_put(specific_ctx);
2499a85cb24fSFrançois Tigeot err:
2500a85cb24fSFrançois Tigeot return ret;
2501a85cb24fSFrançois Tigeot }
2502a85cb24fSFrançois Tigeot
2503*3f2dd94aSFrançois Tigeot static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
2504*3f2dd94aSFrançois Tigeot {
2505*3f2dd94aSFrançois Tigeot return div_u64(1000000000ULL * (2ULL << exponent),
2506*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency);
2507*3f2dd94aSFrançois Tigeot }
2508*3f2dd94aSFrançois Tigeot
2509a85cb24fSFrançois Tigeot /**
2510a85cb24fSFrançois Tigeot * read_properties_unlocked - validate + copy userspace stream open properties
2511a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2512a85cb24fSFrançois Tigeot * @uprops: The array of u64 key value pairs given by userspace
2513a85cb24fSFrançois Tigeot * @n_props: The number of key value pairs expected in @uprops
2514a85cb24fSFrançois Tigeot * @props: The stream configuration built up while validating properties
2515a85cb24fSFrançois Tigeot *
2516a85cb24fSFrançois Tigeot * Note this function only validates properties in isolation it doesn't
2517a85cb24fSFrançois Tigeot * validate that the combination of properties makes sense or that all
2518a85cb24fSFrançois Tigeot * properties necessary for a particular kind of stream have been set.
2519a85cb24fSFrançois Tigeot *
2520a85cb24fSFrançois Tigeot * Note that there currently aren't any ordering requirements for properties so
2521a85cb24fSFrançois Tigeot * we shouldn't validate or assume anything about ordering here. This doesn't
2522a85cb24fSFrançois Tigeot * rule out defining new properties with ordering requirements in the future.
2523a85cb24fSFrançois Tigeot */
2524a85cb24fSFrançois Tigeot static int read_properties_unlocked(struct drm_i915_private *dev_priv,
2525a85cb24fSFrançois Tigeot u64 __user *uprops,
2526a85cb24fSFrançois Tigeot u32 n_props,
2527a85cb24fSFrançois Tigeot struct perf_open_properties *props)
2528a85cb24fSFrançois Tigeot {
2529a85cb24fSFrançois Tigeot u64 __user *uprop = uprops;
2530*3f2dd94aSFrançois Tigeot u32 i;
2531a85cb24fSFrançois Tigeot
2532a85cb24fSFrançois Tigeot memset(props, 0, sizeof(struct perf_open_properties));
2533a85cb24fSFrançois Tigeot
2534a85cb24fSFrançois Tigeot if (!n_props) {
2535a85cb24fSFrançois Tigeot DRM_DEBUG("No i915 perf properties given\n");
2536a85cb24fSFrançois Tigeot return -EINVAL;
2537a85cb24fSFrançois Tigeot }
2538a85cb24fSFrançois Tigeot
2539a85cb24fSFrançois Tigeot /* Considering that ID = 0 is reserved and assuming that we don't
2540a85cb24fSFrançois Tigeot * (currently) expect any configurations to ever specify duplicate
2541a85cb24fSFrançois Tigeot * values for a particular property ID then the last _PROP_MAX value is
2542a85cb24fSFrançois Tigeot * one greater than the maximum number of properties we expect to get
2543a85cb24fSFrançois Tigeot * from userspace.
2544a85cb24fSFrançois Tigeot */
2545a85cb24fSFrançois Tigeot if (n_props >= DRM_I915_PERF_PROP_MAX) {
2546a85cb24fSFrançois Tigeot DRM_DEBUG("More i915 perf properties specified than exist\n");
2547a85cb24fSFrançois Tigeot return -EINVAL;
2548a85cb24fSFrançois Tigeot }
2549a85cb24fSFrançois Tigeot
2550a85cb24fSFrançois Tigeot for (i = 0; i < n_props; i++) {
2551a85cb24fSFrançois Tigeot u64 oa_period, oa_freq_hz;
2552a85cb24fSFrançois Tigeot u64 id, value;
2553a85cb24fSFrançois Tigeot int ret;
2554a85cb24fSFrançois Tigeot
2555a85cb24fSFrançois Tigeot ret = get_user(id, uprop);
2556a85cb24fSFrançois Tigeot if (ret)
2557a85cb24fSFrançois Tigeot return ret;
2558a85cb24fSFrançois Tigeot
2559a85cb24fSFrançois Tigeot ret = get_user(value, uprop + 1);
2560a85cb24fSFrançois Tigeot if (ret)
2561a85cb24fSFrançois Tigeot return ret;
2562a85cb24fSFrançois Tigeot
2563a85cb24fSFrançois Tigeot if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2564a85cb24fSFrançois Tigeot DRM_DEBUG("Unknown i915 perf property ID\n");
2565a85cb24fSFrançois Tigeot return -EINVAL;
2566a85cb24fSFrançois Tigeot }
2567a85cb24fSFrançois Tigeot
2568a85cb24fSFrançois Tigeot switch ((enum drm_i915_perf_property_id)id) {
2569a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_CTX_HANDLE:
2570a85cb24fSFrançois Tigeot props->single_context = 1;
2571a85cb24fSFrançois Tigeot props->ctx_handle = value;
2572a85cb24fSFrançois Tigeot break;
2573a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_SAMPLE_OA:
2574a85cb24fSFrançois Tigeot props->sample_flags |= SAMPLE_OA_REPORT;
2575a85cb24fSFrançois Tigeot break;
2576a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_OA_METRICS_SET:
2577*3f2dd94aSFrançois Tigeot if (value == 0) {
2578a85cb24fSFrançois Tigeot DRM_DEBUG("Unknown OA metric set ID\n");
2579a85cb24fSFrançois Tigeot return -EINVAL;
2580a85cb24fSFrançois Tigeot }
2581a85cb24fSFrançois Tigeot props->metrics_set = value;
2582a85cb24fSFrançois Tigeot break;
2583a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_OA_FORMAT:
2584a85cb24fSFrançois Tigeot if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2585a85cb24fSFrançois Tigeot DRM_DEBUG("Invalid OA report format\n");
2586a85cb24fSFrançois Tigeot return -EINVAL;
2587a85cb24fSFrançois Tigeot }
2588a85cb24fSFrançois Tigeot if (!dev_priv->perf.oa.oa_formats[value].size) {
2589a85cb24fSFrançois Tigeot DRM_DEBUG("Invalid OA report format\n");
2590a85cb24fSFrançois Tigeot return -EINVAL;
2591a85cb24fSFrançois Tigeot }
2592a85cb24fSFrançois Tigeot props->oa_format = value;
2593a85cb24fSFrançois Tigeot break;
2594a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_OA_EXPONENT:
2595a85cb24fSFrançois Tigeot if (value > OA_EXPONENT_MAX) {
2596a85cb24fSFrançois Tigeot DRM_DEBUG("OA timer exponent too high (> %u)\n",
2597a85cb24fSFrançois Tigeot OA_EXPONENT_MAX);
2598a85cb24fSFrançois Tigeot return -EINVAL;
2599a85cb24fSFrançois Tigeot }
2600a85cb24fSFrançois Tigeot
2601a85cb24fSFrançois Tigeot /* Theoretically we can program the OA unit to sample
2602*3f2dd94aSFrançois Tigeot * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
2603*3f2dd94aSFrançois Tigeot * for BXT. We don't allow such high sampling
2604*3f2dd94aSFrançois Tigeot * frequencies by default unless root.
2605a85cb24fSFrançois Tigeot */
2606*3f2dd94aSFrançois Tigeot
2607a85cb24fSFrançois Tigeot BUILD_BUG_ON(sizeof(oa_period) != 8);
2608*3f2dd94aSFrançois Tigeot oa_period = oa_exponent_to_ns(dev_priv, value);
2609a85cb24fSFrançois Tigeot
2610a85cb24fSFrançois Tigeot /* This check is primarily to ensure that oa_period <=
2611a85cb24fSFrançois Tigeot * UINT32_MAX (before passing to do_div which only
2612a85cb24fSFrançois Tigeot * accepts a u32 denominator), but we can also skip
2613a85cb24fSFrançois Tigeot * checking anything < 1Hz which implicitly can't be
2614a85cb24fSFrançois Tigeot * limited via an integer oa_max_sample_rate.
2615a85cb24fSFrançois Tigeot */
2616a85cb24fSFrançois Tigeot if (oa_period <= NSEC_PER_SEC) {
2617a85cb24fSFrançois Tigeot u64 tmp = NSEC_PER_SEC;
2618a85cb24fSFrançois Tigeot do_div(tmp, oa_period);
2619a85cb24fSFrançois Tigeot oa_freq_hz = tmp;
2620a85cb24fSFrançois Tigeot } else
2621a85cb24fSFrançois Tigeot oa_freq_hz = 0;
2622a85cb24fSFrançois Tigeot
2623a85cb24fSFrançois Tigeot if (oa_freq_hz > i915_oa_max_sample_rate &&
2624a85cb24fSFrançois Tigeot !capable(CAP_SYS_ADMIN)) {
2625a85cb24fSFrançois Tigeot DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
2626a85cb24fSFrançois Tigeot i915_oa_max_sample_rate);
2627a85cb24fSFrançois Tigeot return -EACCES;
2628a85cb24fSFrançois Tigeot }
2629a85cb24fSFrançois Tigeot
2630a85cb24fSFrançois Tigeot props->oa_periodic = true;
2631a85cb24fSFrançois Tigeot props->oa_period_exponent = value;
2632a85cb24fSFrançois Tigeot break;
2633a85cb24fSFrançois Tigeot case DRM_I915_PERF_PROP_MAX:
2634a85cb24fSFrançois Tigeot MISSING_CASE(id);
2635a85cb24fSFrançois Tigeot return -EINVAL;
2636a85cb24fSFrançois Tigeot }
2637a85cb24fSFrançois Tigeot
2638a85cb24fSFrançois Tigeot uprop += 2;
2639a85cb24fSFrançois Tigeot }
2640a85cb24fSFrançois Tigeot
2641a85cb24fSFrançois Tigeot return 0;
2642a85cb24fSFrançois Tigeot }
2643a85cb24fSFrançois Tigeot #endif
2644a85cb24fSFrançois Tigeot
2645a85cb24fSFrançois Tigeot /**
2646a85cb24fSFrançois Tigeot * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
2647a85cb24fSFrançois Tigeot * @dev: drm device
2648a85cb24fSFrançois Tigeot * @data: ioctl data copied from userspace (unvalidated)
2649a85cb24fSFrançois Tigeot * @file: drm file
2650a85cb24fSFrançois Tigeot *
2651a85cb24fSFrançois Tigeot * Validates the stream open parameters given by userspace including flags
2652a85cb24fSFrançois Tigeot * and an array of u64 key, value pair properties.
2653a85cb24fSFrançois Tigeot *
2654a85cb24fSFrançois Tigeot * Very little is assumed up front about the nature of the stream being
2655a85cb24fSFrançois Tigeot * opened (for instance we don't assume it's for periodic OA unit metrics). An
2656a85cb24fSFrançois Tigeot * i915-perf stream is expected to be a suitable interface for other forms of
2657a85cb24fSFrançois Tigeot * buffered data written by the GPU besides periodic OA metrics.
2658a85cb24fSFrançois Tigeot *
2659a85cb24fSFrançois Tigeot * Note we copy the properties from userspace outside of the i915 perf
2660a85cb24fSFrançois Tigeot * mutex to avoid an awkward lockdep with mmap_sem.
2661a85cb24fSFrançois Tigeot *
2662a85cb24fSFrançois Tigeot * Most of the implementation details are handled by
2663a85cb24fSFrançois Tigeot * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
2664a85cb24fSFrançois Tigeot * mutex for serializing with any non-file-operation driver hooks.
2665a85cb24fSFrançois Tigeot *
2666a85cb24fSFrançois Tigeot * Return: A newly opened i915 Perf stream file descriptor or negative
2667a85cb24fSFrançois Tigeot * error code on failure.
2668a85cb24fSFrançois Tigeot */
i915_perf_open_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2669a85cb24fSFrançois Tigeot int i915_perf_open_ioctl(struct drm_device *dev, void *data,
2670a85cb24fSFrançois Tigeot struct drm_file *file)
2671a85cb24fSFrançois Tigeot {
2672a85cb24fSFrançois Tigeot #if 0
2673a85cb24fSFrançois Tigeot struct drm_i915_private *dev_priv = dev->dev_private;
2674a85cb24fSFrançois Tigeot struct drm_i915_perf_open_param *param = data;
2675a85cb24fSFrançois Tigeot struct perf_open_properties props;
2676a85cb24fSFrançois Tigeot u32 known_open_flags;
2677a85cb24fSFrançois Tigeot int ret;
2678a85cb24fSFrançois Tigeot
2679a85cb24fSFrançois Tigeot if (!dev_priv->perf.initialized) {
2680a85cb24fSFrançois Tigeot #endif
2681a85cb24fSFrançois Tigeot DRM_DEBUG("i915 perf interface not available for this system\n");
2682a85cb24fSFrançois Tigeot return -ENOTSUPP;
2683a85cb24fSFrançois Tigeot #if 0
2684a85cb24fSFrançois Tigeot }
2685a85cb24fSFrançois Tigeot
2686a85cb24fSFrançois Tigeot known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
2687a85cb24fSFrançois Tigeot I915_PERF_FLAG_FD_NONBLOCK |
2688a85cb24fSFrançois Tigeot I915_PERF_FLAG_DISABLED;
2689a85cb24fSFrançois Tigeot if (param->flags & ~known_open_flags) {
2690a85cb24fSFrançois Tigeot DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
2691a85cb24fSFrançois Tigeot return -EINVAL;
2692a85cb24fSFrançois Tigeot }
2693a85cb24fSFrançois Tigeot
2694a85cb24fSFrançois Tigeot ret = read_properties_unlocked(dev_priv,
2695a85cb24fSFrançois Tigeot u64_to_user_ptr(param->properties_ptr),
2696a85cb24fSFrançois Tigeot param->num_properties,
2697a85cb24fSFrançois Tigeot &props);
2698a85cb24fSFrançois Tigeot if (ret)
2699a85cb24fSFrançois Tigeot return ret;
2700a85cb24fSFrançois Tigeot
2701a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2702a85cb24fSFrançois Tigeot ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
2703a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2704a85cb24fSFrançois Tigeot
2705a85cb24fSFrançois Tigeot return ret;
2706a85cb24fSFrançois Tigeot #endif
2707a85cb24fSFrançois Tigeot }
2708a85cb24fSFrançois Tigeot
2709a85cb24fSFrançois Tigeot /**
2710a85cb24fSFrançois Tigeot * i915_perf_register - exposes i915-perf to userspace
2711a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2712a85cb24fSFrançois Tigeot *
2713a85cb24fSFrançois Tigeot * In particular OA metric sets are advertised under a sysfs metrics/
2714a85cb24fSFrançois Tigeot * directory allowing userspace to enumerate valid IDs that can be
2715a85cb24fSFrançois Tigeot * used to open an i915-perf stream.
2716a85cb24fSFrançois Tigeot */
i915_perf_register(struct drm_i915_private * dev_priv)2717a85cb24fSFrançois Tigeot void i915_perf_register(struct drm_i915_private *dev_priv)
2718a85cb24fSFrançois Tigeot {
2719*3f2dd94aSFrançois Tigeot #if 0
2720*3f2dd94aSFrançois Tigeot int ret;
2721a85cb24fSFrançois Tigeot
2722a85cb24fSFrançois Tigeot if (!dev_priv->perf.initialized)
2723a85cb24fSFrançois Tigeot return;
2724a85cb24fSFrançois Tigeot
2725a85cb24fSFrançois Tigeot /* To be sure we're synchronized with an attempted
2726a85cb24fSFrançois Tigeot * i915_perf_open_ioctl(); considering that we register after
2727a85cb24fSFrançois Tigeot * being exposed to userspace.
2728a85cb24fSFrançois Tigeot */
2729a85cb24fSFrançois Tigeot mutex_lock(&dev_priv->perf.lock);
2730a85cb24fSFrançois Tigeot
2731a85cb24fSFrançois Tigeot dev_priv->perf.metrics_kobj =
2732a85cb24fSFrançois Tigeot kobject_create_and_add("metrics",
2733a85cb24fSFrançois Tigeot &dev_priv->drm.primary->kdev->kobj);
2734a85cb24fSFrançois Tigeot if (!dev_priv->perf.metrics_kobj)
2735a85cb24fSFrançois Tigeot goto exit;
2736a85cb24fSFrançois Tigeot
2737*3f2dd94aSFrançois Tigeot sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
2738*3f2dd94aSFrançois Tigeot
2739*3f2dd94aSFrançois Tigeot if (IS_HASWELL(dev_priv)) {
2740*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_hsw(dev_priv);
2741*3f2dd94aSFrançois Tigeot } else if (IS_BROADWELL(dev_priv)) {
2742*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_bdw(dev_priv);
2743*3f2dd94aSFrançois Tigeot } else if (IS_CHERRYVIEW(dev_priv)) {
2744*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_chv(dev_priv);
2745*3f2dd94aSFrançois Tigeot } else if (IS_SKYLAKE(dev_priv)) {
2746*3f2dd94aSFrançois Tigeot if (IS_SKL_GT2(dev_priv))
2747*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_sklgt2(dev_priv);
2748*3f2dd94aSFrançois Tigeot else if (IS_SKL_GT3(dev_priv))
2749*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_sklgt3(dev_priv);
2750*3f2dd94aSFrançois Tigeot else if (IS_SKL_GT4(dev_priv))
2751*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_sklgt4(dev_priv);
2752*3f2dd94aSFrançois Tigeot } else if (IS_BROXTON(dev_priv)) {
2753*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_bxt(dev_priv);
2754*3f2dd94aSFrançois Tigeot } else if (IS_KABYLAKE(dev_priv)) {
2755*3f2dd94aSFrançois Tigeot if (IS_KBL_GT2(dev_priv))
2756*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_kblgt2(dev_priv);
2757*3f2dd94aSFrançois Tigeot else if (IS_KBL_GT3(dev_priv))
2758*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_kblgt3(dev_priv);
2759*3f2dd94aSFrançois Tigeot } else if (IS_GEMINILAKE(dev_priv)) {
2760*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_glk(dev_priv);
2761*3f2dd94aSFrançois Tigeot } else if (IS_COFFEELAKE(dev_priv)) {
2762*3f2dd94aSFrançois Tigeot if (IS_CFL_GT2(dev_priv))
2763*3f2dd94aSFrançois Tigeot i915_perf_load_test_config_cflgt2(dev_priv);
2764*3f2dd94aSFrançois Tigeot }
2765*3f2dd94aSFrançois Tigeot
2766*3f2dd94aSFrançois Tigeot if (dev_priv->perf.oa.test_config.id == 0)
2767*3f2dd94aSFrançois Tigeot goto sysfs_error;
2768*3f2dd94aSFrançois Tigeot
2769*3f2dd94aSFrançois Tigeot ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
2770*3f2dd94aSFrançois Tigeot &dev_priv->perf.oa.test_config.sysfs_metric);
2771*3f2dd94aSFrançois Tigeot if (ret)
2772*3f2dd94aSFrançois Tigeot goto sysfs_error;
2773*3f2dd94aSFrançois Tigeot
2774*3f2dd94aSFrançois Tigeot atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
2775*3f2dd94aSFrançois Tigeot
2776*3f2dd94aSFrançois Tigeot goto exit;
2777*3f2dd94aSFrançois Tigeot
2778*3f2dd94aSFrançois Tigeot sysfs_error:
2779a85cb24fSFrançois Tigeot kobject_put(dev_priv->perf.metrics_kobj);
2780a85cb24fSFrançois Tigeot dev_priv->perf.metrics_kobj = NULL;
2781a85cb24fSFrançois Tigeot
2782a85cb24fSFrançois Tigeot exit:
2783a85cb24fSFrançois Tigeot mutex_unlock(&dev_priv->perf.lock);
2784a85cb24fSFrançois Tigeot #endif
2785a85cb24fSFrançois Tigeot }
2786a85cb24fSFrançois Tigeot
2787a85cb24fSFrançois Tigeot /**
2788a85cb24fSFrançois Tigeot * i915_perf_unregister - hide i915-perf from userspace
2789a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2790a85cb24fSFrançois Tigeot *
2791a85cb24fSFrançois Tigeot * i915-perf state cleanup is split up into an 'unregister' and
2792a85cb24fSFrançois Tigeot * 'deinit' phase where the interface is first hidden from
2793a85cb24fSFrançois Tigeot * userspace by i915_perf_unregister() before cleaning up
2794a85cb24fSFrançois Tigeot * remaining state in i915_perf_fini().
2795a85cb24fSFrançois Tigeot */
i915_perf_unregister(struct drm_i915_private * dev_priv)2796a85cb24fSFrançois Tigeot void i915_perf_unregister(struct drm_i915_private *dev_priv)
2797a85cb24fSFrançois Tigeot {
2798a85cb24fSFrançois Tigeot if (!IS_HASWELL(dev_priv))
2799a85cb24fSFrançois Tigeot return;
2800a85cb24fSFrançois Tigeot
2801a85cb24fSFrançois Tigeot if (!dev_priv->perf.metrics_kobj)
2802a85cb24fSFrançois Tigeot return;
2803a85cb24fSFrançois Tigeot
2804a85cb24fSFrançois Tigeot #if 0
2805a85cb24fSFrançois Tigeot i915_perf_unregister_sysfs_hsw(dev_priv);
2806a85cb24fSFrançois Tigeot
2807a85cb24fSFrançois Tigeot kobject_put(dev_priv->perf.metrics_kobj);
2808a85cb24fSFrançois Tigeot dev_priv->perf.metrics_kobj = NULL;
2809a85cb24fSFrançois Tigeot #endif
2810a85cb24fSFrançois Tigeot }
2811a85cb24fSFrançois Tigeot
2812a85cb24fSFrançois Tigeot #if 0
2813a85cb24fSFrançois Tigeot static struct ctl_table oa_table[] = {
2814a85cb24fSFrançois Tigeot {
2815a85cb24fSFrançois Tigeot .procname = "perf_stream_paranoid",
2816a85cb24fSFrançois Tigeot .data = &i915_perf_stream_paranoid,
2817a85cb24fSFrançois Tigeot .maxlen = sizeof(i915_perf_stream_paranoid),
2818a85cb24fSFrançois Tigeot .mode = 0644,
2819a85cb24fSFrançois Tigeot .proc_handler = proc_dointvec_minmax,
2820a85cb24fSFrançois Tigeot .extra1 = &zero,
2821a85cb24fSFrançois Tigeot .extra2 = &one,
2822a85cb24fSFrançois Tigeot },
2823a85cb24fSFrançois Tigeot {
2824a85cb24fSFrançois Tigeot .procname = "oa_max_sample_rate",
2825a85cb24fSFrançois Tigeot .data = &i915_oa_max_sample_rate,
2826a85cb24fSFrançois Tigeot .maxlen = sizeof(i915_oa_max_sample_rate),
2827a85cb24fSFrançois Tigeot .mode = 0644,
2828a85cb24fSFrançois Tigeot .proc_handler = proc_dointvec_minmax,
2829a85cb24fSFrançois Tigeot .extra1 = &zero,
2830a85cb24fSFrançois Tigeot .extra2 = &oa_sample_rate_hard_limit,
2831a85cb24fSFrançois Tigeot },
2832a85cb24fSFrançois Tigeot {}
2833a85cb24fSFrançois Tigeot };
2834a85cb24fSFrançois Tigeot
2835a85cb24fSFrançois Tigeot static struct ctl_table i915_root[] = {
2836a85cb24fSFrançois Tigeot {
2837a85cb24fSFrançois Tigeot .procname = "i915",
2838a85cb24fSFrançois Tigeot .maxlen = 0,
2839a85cb24fSFrançois Tigeot .mode = 0555,
2840a85cb24fSFrançois Tigeot .child = oa_table,
2841a85cb24fSFrançois Tigeot },
2842a85cb24fSFrançois Tigeot {}
2843a85cb24fSFrançois Tigeot };
2844a85cb24fSFrançois Tigeot
2845a85cb24fSFrançois Tigeot static struct ctl_table dev_root[] = {
2846a85cb24fSFrançois Tigeot {
2847a85cb24fSFrançois Tigeot .procname = "dev",
2848a85cb24fSFrançois Tigeot .maxlen = 0,
2849a85cb24fSFrançois Tigeot .mode = 0555,
2850a85cb24fSFrançois Tigeot .child = i915_root,
2851a85cb24fSFrançois Tigeot },
2852a85cb24fSFrançois Tigeot {}
2853a85cb24fSFrançois Tigeot };
2854a85cb24fSFrançois Tigeot #endif
2855a85cb24fSFrançois Tigeot
2856a85cb24fSFrançois Tigeot /**
2857a85cb24fSFrançois Tigeot * i915_perf_init - initialize i915-perf state on module load
2858a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2859a85cb24fSFrançois Tigeot *
2860a85cb24fSFrançois Tigeot * Initializes i915-perf state without exposing anything to userspace.
2861a85cb24fSFrançois Tigeot *
2862a85cb24fSFrançois Tigeot * Note: i915-perf initialization is split into an 'init' and 'register'
2863a85cb24fSFrançois Tigeot * phase with the i915_perf_register() exposing state to userspace.
2864a85cb24fSFrançois Tigeot */
i915_perf_init(struct drm_i915_private * dev_priv)2865a85cb24fSFrançois Tigeot void i915_perf_init(struct drm_i915_private *dev_priv)
2866a85cb24fSFrançois Tigeot {
2867*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency = 0;
2868a85cb24fSFrançois Tigeot
2869a85cb24fSFrançois Tigeot #if 0
2870*3f2dd94aSFrançois Tigeot if (IS_HASWELL(dev_priv)) {
2871*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_b_counter_reg =
2872*3f2dd94aSFrançois Tigeot gen7_is_valid_b_counter_addr;
2873*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_mux_reg =
2874*3f2dd94aSFrançois Tigeot hsw_is_valid_mux_addr;
2875*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
2876*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
2877*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
2878*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
2879*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
2880*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
2881*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.read = gen7_oa_read;
2882*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_hw_tail_read =
2883*3f2dd94aSFrançois Tigeot gen7_oa_hw_tail_read;
2884*3f2dd94aSFrançois Tigeot
2885*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency = 12500000;
2886*3f2dd94aSFrançois Tigeot
2887*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_formats = hsw_oa_formats;
2888*3f2dd94aSFrançois Tigeot } else if (i915_modparams.enable_execlists) {
2889*3f2dd94aSFrançois Tigeot /* Note: that although we could theoretically also support the
2890*3f2dd94aSFrançois Tigeot * legacy ringbuffer mode on BDW (and earlier iterations of
2891*3f2dd94aSFrançois Tigeot * this driver, before upstreaming did this) it didn't seem
2892*3f2dd94aSFrançois Tigeot * worth the complexity to maintain now that BDW+ enable
2893*3f2dd94aSFrançois Tigeot * execlist mode by default.
2894*3f2dd94aSFrançois Tigeot */
2895*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_b_counter_reg =
2896*3f2dd94aSFrançois Tigeot gen7_is_valid_b_counter_addr;
2897*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_mux_reg =
2898*3f2dd94aSFrançois Tigeot gen8_is_valid_mux_addr;
2899*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_flex_reg =
2900*3f2dd94aSFrançois Tigeot gen8_is_valid_flex_addr;
2901*3f2dd94aSFrançois Tigeot
2902*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer;
2903*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
2904*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
2905*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
2906*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
2907*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.read = gen8_oa_read;
2908*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
2909*3f2dd94aSFrançois Tigeot
2910*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
2911*3f2dd94aSFrançois Tigeot
2912*3f2dd94aSFrançois Tigeot if (IS_GEN8(dev_priv)) {
2913*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
2914*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
2915*3f2dd94aSFrançois Tigeot
2916*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency = 12500000;
2917*3f2dd94aSFrançois Tigeot
2918*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
2919*3f2dd94aSFrançois Tigeot if (IS_CHERRYVIEW(dev_priv)) {
2920*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ops.is_valid_mux_reg =
2921*3f2dd94aSFrançois Tigeot chv_is_valid_mux_addr;
2922*3f2dd94aSFrançois Tigeot }
2923*3f2dd94aSFrançois Tigeot } else if (IS_GEN9(dev_priv)) {
2924*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
2925*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
2926*3f2dd94aSFrançois Tigeot
2927*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
2928*3f2dd94aSFrançois Tigeot
2929*3f2dd94aSFrançois Tigeot switch (dev_priv->info.platform) {
2930*3f2dd94aSFrançois Tigeot case INTEL_BROXTON:
2931*3f2dd94aSFrançois Tigeot case INTEL_GEMINILAKE:
2932*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency = 19200000;
2933*3f2dd94aSFrançois Tigeot break;
2934*3f2dd94aSFrançois Tigeot case INTEL_SKYLAKE:
2935*3f2dd94aSFrançois Tigeot case INTEL_KABYLAKE:
2936*3f2dd94aSFrançois Tigeot case INTEL_COFFEELAKE:
2937*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency = 12000000;
2938*3f2dd94aSFrançois Tigeot break;
2939*3f2dd94aSFrançois Tigeot default:
2940*3f2dd94aSFrançois Tigeot /* Leave timestamp_frequency to 0 so we can
2941*3f2dd94aSFrançois Tigeot * detect unsupported platforms.
2942*3f2dd94aSFrançois Tigeot */
2943*3f2dd94aSFrançois Tigeot break;
2944*3f2dd94aSFrançois Tigeot }
2945*3f2dd94aSFrançois Tigeot }
2946*3f2dd94aSFrançois Tigeot }
2947*3f2dd94aSFrançois Tigeot
2948*3f2dd94aSFrançois Tigeot if (dev_priv->perf.oa.timestamp_frequency) {
2949a85cb24fSFrançois Tigeot hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
2950a85cb24fSFrançois Tigeot CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2951a85cb24fSFrançois Tigeot dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
2952a85cb24fSFrançois Tigeot init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
2953a85cb24fSFrançois Tigeot
2954a85cb24fSFrançois Tigeot INIT_LIST_HEAD(&dev_priv->perf.streams);
2955a85cb24fSFrançois Tigeot mutex_init(&dev_priv->perf.lock);
2956*3f2dd94aSFrançois Tigeot spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
2957a85cb24fSFrançois Tigeot
2958*3f2dd94aSFrançois Tigeot oa_sample_rate_hard_limit =
2959*3f2dd94aSFrançois Tigeot dev_priv->perf.oa.timestamp_frequency / 2;
2960a85cb24fSFrançois Tigeot dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
2961*3f2dd94aSFrançois Tigeot
2962*3f2dd94aSFrançois Tigeot mutex_init(&dev_priv->perf.metrics_lock);
2963*3f2dd94aSFrançois Tigeot idr_init(&dev_priv->perf.metrics_idr);
2964a85cb24fSFrançois Tigeot
2965a85cb24fSFrançois Tigeot dev_priv->perf.initialized = true;
2966a85cb24fSFrançois Tigeot }
2967*3f2dd94aSFrançois Tigeot #endif
2968*3f2dd94aSFrançois Tigeot }
2969a85cb24fSFrançois Tigeot
2970a85cb24fSFrançois Tigeot /**
2971a85cb24fSFrançois Tigeot * i915_perf_fini - Counter part to i915_perf_init()
2972a85cb24fSFrançois Tigeot * @dev_priv: i915 device instance
2973a85cb24fSFrançois Tigeot */
i915_perf_fini(struct drm_i915_private * dev_priv)2974a85cb24fSFrançois Tigeot void i915_perf_fini(struct drm_i915_private *dev_priv)
2975a85cb24fSFrançois Tigeot {
2976a85cb24fSFrançois Tigeot if (!dev_priv->perf.initialized)
2977a85cb24fSFrançois Tigeot return;
2978a85cb24fSFrançois Tigeot
2979a85cb24fSFrançois Tigeot #if 0
2980a85cb24fSFrançois Tigeot unregister_sysctl_table(dev_priv->perf.sysctl_header);
2981a85cb24fSFrançois Tigeot
2982a85cb24fSFrançois Tigeot memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
2983*3f2dd94aSFrançois Tigeot
2984a85cb24fSFrançois Tigeot #endif
2985a85cb24fSFrançois Tigeot dev_priv->perf.initialized = false;
2986a85cb24fSFrançois Tigeot }
2987