xref: /dflybsd-src/sys/dev/drm/i915/i915_perf.c (revision 3f2dd94a569761201b5b0a18b2f697f97fe1b9dc)
1a85cb24fSFrançois Tigeot /*
2a85cb24fSFrançois Tigeot  * Copyright © 2015-2016 Intel Corporation
3a85cb24fSFrançois Tigeot  *
4a85cb24fSFrançois Tigeot  * Permission is hereby granted, free of charge, to any person obtaining a
5a85cb24fSFrançois Tigeot  * copy of this software and associated documentation files (the "Software"),
6a85cb24fSFrançois Tigeot  * to deal in the Software without restriction, including without limitation
7a85cb24fSFrançois Tigeot  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8a85cb24fSFrançois Tigeot  * and/or sell copies of the Software, and to permit persons to whom the
9a85cb24fSFrançois Tigeot  * Software is furnished to do so, subject to the following conditions:
10a85cb24fSFrançois Tigeot  *
11a85cb24fSFrançois Tigeot  * The above copyright notice and this permission notice (including the next
12a85cb24fSFrançois Tigeot  * paragraph) shall be included in all copies or substantial portions of the
13a85cb24fSFrançois Tigeot  * Software.
14a85cb24fSFrançois Tigeot  *
15a85cb24fSFrançois Tigeot  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16a85cb24fSFrançois Tigeot  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17a85cb24fSFrançois Tigeot  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18a85cb24fSFrançois Tigeot  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19a85cb24fSFrançois Tigeot  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20a85cb24fSFrançois Tigeot  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21a85cb24fSFrançois Tigeot  * IN THE SOFTWARE.
22a85cb24fSFrançois Tigeot  *
23a85cb24fSFrançois Tigeot  * Authors:
24a85cb24fSFrançois Tigeot  *   Robert Bragg <robert@sixbynine.org>
25a85cb24fSFrançois Tigeot  */
26a85cb24fSFrançois Tigeot 
27a85cb24fSFrançois Tigeot 
28a85cb24fSFrançois Tigeot /**
29a85cb24fSFrançois Tigeot  * DOC: i915 Perf Overview
30a85cb24fSFrançois Tigeot  *
31a85cb24fSFrançois Tigeot  * Gen graphics supports a large number of performance counters that can help
32a85cb24fSFrançois Tigeot  * driver and application developers understand and optimize their use of the
33a85cb24fSFrançois Tigeot  * GPU.
34a85cb24fSFrançois Tigeot  *
35a85cb24fSFrançois Tigeot  * This i915 perf interface enables userspace to configure and open a file
36a85cb24fSFrançois Tigeot  * descriptor representing a stream of GPU metrics which can then be read() as
37a85cb24fSFrançois Tigeot  * a stream of sample records.
38a85cb24fSFrançois Tigeot  *
39a85cb24fSFrançois Tigeot  * The interface is particularly suited to exposing buffered metrics that are
40a85cb24fSFrançois Tigeot  * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU.
41a85cb24fSFrançois Tigeot  *
42a85cb24fSFrançois Tigeot  * Streams representing a single context are accessible to applications with a
43a85cb24fSFrançois Tigeot  * corresponding drm file descriptor, such that OpenGL can use the interface
44a85cb24fSFrançois Tigeot  * without special privileges. Access to system-wide metrics requires root
45a85cb24fSFrançois Tigeot  * privileges by default, unless changed via the dev.i915.perf_event_paranoid
46a85cb24fSFrançois Tigeot  * sysctl option.
47a85cb24fSFrançois Tigeot  *
48a85cb24fSFrançois Tigeot  */
49a85cb24fSFrançois Tigeot 
50a85cb24fSFrançois Tigeot /**
51a85cb24fSFrançois Tigeot  * DOC: i915 Perf History and Comparison with Core Perf
52a85cb24fSFrançois Tigeot  *
53a85cb24fSFrançois Tigeot  * The interface was initially inspired by the core Perf infrastructure but
54a85cb24fSFrançois Tigeot  * some notable differences are:
55a85cb24fSFrançois Tigeot  *
56a85cb24fSFrançois Tigeot  * i915 perf file descriptors represent a "stream" instead of an "event"; where
57a85cb24fSFrançois Tigeot  * a perf event primarily corresponds to a single 64bit value, while a stream
58a85cb24fSFrançois Tigeot  * might sample sets of tightly-coupled counters, depending on the
59a85cb24fSFrançois Tigeot  * configuration.  For example the Gen OA unit isn't designed to support
60a85cb24fSFrançois Tigeot  * orthogonal configurations of individual counters; it's configured for a set
61a85cb24fSFrançois Tigeot  * of related counters. Samples for an i915 perf stream capturing OA metrics
62a85cb24fSFrançois Tigeot  * will include a set of counter values packed in a compact HW specific format.
63a85cb24fSFrançois Tigeot  * The OA unit supports a number of different packing formats which can be
64a85cb24fSFrançois Tigeot  * selected by the user opening the stream. Perf has support for grouping
65a85cb24fSFrançois Tigeot  * events, but each event in the group is configured, validated and
66a85cb24fSFrançois Tigeot  * authenticated individually with separate system calls.
67a85cb24fSFrançois Tigeot  *
68a85cb24fSFrançois Tigeot  * i915 perf stream configurations are provided as an array of u64 (key,value)
69a85cb24fSFrançois Tigeot  * pairs, instead of a fixed struct with multiple miscellaneous config members,
70a85cb24fSFrançois Tigeot  * interleaved with event-type specific members.
71a85cb24fSFrançois Tigeot  *
72a85cb24fSFrançois Tigeot  * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
73a85cb24fSFrançois Tigeot  * The supported metrics are being written to memory by the GPU unsynchronized
74a85cb24fSFrançois Tigeot  * with the CPU, using HW specific packing formats for counter sets. Sometimes
75a85cb24fSFrançois Tigeot  * the constraints on HW configuration require reports to be filtered before it
76a85cb24fSFrançois Tigeot  * would be acceptable to expose them to unprivileged applications - to hide
77a85cb24fSFrançois Tigeot  * the metrics of other processes/contexts. For these use cases a read() based
78a85cb24fSFrançois Tigeot  * interface is a good fit, and provides an opportunity to filter data as it
79a85cb24fSFrançois Tigeot  * gets copied from the GPU mapped buffers to userspace buffers.
80a85cb24fSFrançois Tigeot  *
81a85cb24fSFrançois Tigeot  *
82a85cb24fSFrançois Tigeot  * Issues hit with first prototype based on Core Perf
83a85cb24fSFrançois Tigeot  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
84a85cb24fSFrançois Tigeot  *
85a85cb24fSFrançois Tigeot  * The first prototype of this driver was based on the core perf
86a85cb24fSFrançois Tigeot  * infrastructure, and while we did make that mostly work, with some changes to
87a85cb24fSFrançois Tigeot  * perf, we found we were breaking or working around too many assumptions baked
88a85cb24fSFrançois Tigeot  * into perf's currently cpu centric design.
89a85cb24fSFrançois Tigeot  *
90a85cb24fSFrançois Tigeot  * In the end we didn't see a clear benefit to making perf's implementation and
91a85cb24fSFrançois Tigeot  * interface more complex by changing design assumptions while we knew we still
92a85cb24fSFrançois Tigeot  * wouldn't be able to use any existing perf based userspace tools.
93a85cb24fSFrançois Tigeot  *
94a85cb24fSFrançois Tigeot  * Also considering the Gen specific nature of the Observability hardware and
95a85cb24fSFrançois Tigeot  * how userspace will sometimes need to combine i915 perf OA metrics with
96a85cb24fSFrançois Tigeot  * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're
97a85cb24fSFrançois Tigeot  * expecting the interface to be used by a platform specific userspace such as
98a85cb24fSFrançois Tigeot  * OpenGL or tools. This is to say; we aren't inherently missing out on having
99a85cb24fSFrançois Tigeot  * a standard vendor/architecture agnostic interface by not using perf.
100a85cb24fSFrançois Tigeot  *
101a85cb24fSFrançois Tigeot  *
102a85cb24fSFrançois Tigeot  * For posterity, in case we might re-visit trying to adapt core perf to be
103a85cb24fSFrançois Tigeot  * better suited to exposing i915 metrics these were the main pain points we
104a85cb24fSFrançois Tigeot  * hit:
105a85cb24fSFrançois Tigeot  *
106a85cb24fSFrançois Tigeot  * - The perf based OA PMU driver broke some significant design assumptions:
107a85cb24fSFrançois Tigeot  *
108a85cb24fSFrançois Tigeot  *   Existing perf pmus are used for profiling work on a cpu and we were
109a85cb24fSFrançois Tigeot  *   introducing the idea of _IS_DEVICE pmus with different security
110a85cb24fSFrançois Tigeot  *   implications, the need to fake cpu-related data (such as user/kernel
111a85cb24fSFrançois Tigeot  *   registers) to fit with perf's current design, and adding _DEVICE records
112a85cb24fSFrançois Tigeot  *   as a way to forward device-specific status records.
113a85cb24fSFrançois Tigeot  *
114a85cb24fSFrançois Tigeot  *   The OA unit writes reports of counters into a circular buffer, without
115a85cb24fSFrançois Tigeot  *   involvement from the CPU, making our PMU driver the first of a kind.
116a85cb24fSFrançois Tigeot  *
117a85cb24fSFrançois Tigeot  *   Given the way we were periodically forward data from the GPU-mapped, OA
118a85cb24fSFrançois Tigeot  *   buffer to perf's buffer, those bursts of sample writes looked to perf like
119a85cb24fSFrançois Tigeot  *   we were sampling too fast and so we had to subvert its throttling checks.
120a85cb24fSFrançois Tigeot  *
121a85cb24fSFrançois Tigeot  *   Perf supports groups of counters and allows those to be read via
122a85cb24fSFrançois Tigeot  *   transactions internally but transactions currently seem designed to be
123a85cb24fSFrançois Tigeot  *   explicitly initiated from the cpu (say in response to a userspace read())
124a85cb24fSFrançois Tigeot  *   and while we could pull a report out of the OA buffer we can't
125a85cb24fSFrançois Tigeot  *   trigger a report from the cpu on demand.
126a85cb24fSFrançois Tigeot  *
127a85cb24fSFrançois Tigeot  *   Related to being report based; the OA counters are configured in HW as a
128a85cb24fSFrançois Tigeot  *   set while perf generally expects counter configurations to be orthogonal.
129a85cb24fSFrançois Tigeot  *   Although counters can be associated with a group leader as they are
130a85cb24fSFrançois Tigeot  *   opened, there's no clear precedent for being able to provide group-wide
131a85cb24fSFrançois Tigeot  *   configuration attributes (for example we want to let userspace choose the
132a85cb24fSFrançois Tigeot  *   OA unit report format used to capture all counters in a set, or specify a
133a85cb24fSFrançois Tigeot  *   GPU context to filter metrics on). We avoided using perf's grouping
134a85cb24fSFrançois Tigeot  *   feature and forwarded OA reports to userspace via perf's 'raw' sample
135a85cb24fSFrançois Tigeot  *   field. This suited our userspace well considering how coupled the counters
136a85cb24fSFrançois Tigeot  *   are when dealing with normalizing. It would be inconvenient to split
137a85cb24fSFrançois Tigeot  *   counters up into separate events, only to require userspace to recombine
138a85cb24fSFrançois Tigeot  *   them. For Mesa it's also convenient to be forwarded raw, periodic reports
139a85cb24fSFrançois Tigeot  *   for combining with the side-band raw reports it captures using
140a85cb24fSFrançois Tigeot  *   MI_REPORT_PERF_COUNT commands.
141a85cb24fSFrançois Tigeot  *
142a85cb24fSFrançois Tigeot  *   - As a side note on perf's grouping feature; there was also some concern
143a85cb24fSFrançois Tigeot  *     that using PERF_FORMAT_GROUP as a way to pack together counter values
144a85cb24fSFrançois Tigeot  *     would quite drastically inflate our sample sizes, which would likely
145a85cb24fSFrançois Tigeot  *     lower the effective sampling resolutions we could use when the available
146a85cb24fSFrançois Tigeot  *     memory bandwidth is limited.
147a85cb24fSFrançois Tigeot  *
148a85cb24fSFrançois Tigeot  *     With the OA unit's report formats, counters are packed together as 32
149a85cb24fSFrançois Tigeot  *     or 40bit values, with the largest report size being 256 bytes.
150a85cb24fSFrançois Tigeot  *
151a85cb24fSFrançois Tigeot  *     PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a
152a85cb24fSFrançois Tigeot  *     documented ordering to the values, implying PERF_FORMAT_ID must also be
153a85cb24fSFrançois Tigeot  *     used to add a 64bit ID before each value; giving 16 bytes per counter.
154a85cb24fSFrançois Tigeot  *
155a85cb24fSFrançois Tigeot  *   Related to counter orthogonality; we can't time share the OA unit, while
156a85cb24fSFrançois Tigeot  *   event scheduling is a central design idea within perf for allowing
157a85cb24fSFrançois Tigeot  *   userspace to open + enable more events than can be configured in HW at any
158a85cb24fSFrançois Tigeot  *   one time.  The OA unit is not designed to allow re-configuration while in
159a85cb24fSFrançois Tigeot  *   use. We can't reconfigure the OA unit without losing internal OA unit
160a85cb24fSFrançois Tigeot  *   state which we can't access explicitly to save and restore. Reconfiguring
161a85cb24fSFrançois Tigeot  *   the OA unit is also relatively slow, involving ~100 register writes. From
162a85cb24fSFrançois Tigeot  *   userspace Mesa also depends on a stable OA configuration when emitting
163a85cb24fSFrançois Tigeot  *   MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be
164a85cb24fSFrançois Tigeot  *   disabled while there are outstanding MI_RPC commands lest we hang the
165a85cb24fSFrançois Tigeot  *   command streamer.
166a85cb24fSFrançois Tigeot  *
167a85cb24fSFrançois Tigeot  *   The contents of sample records aren't extensible by device drivers (i.e.
168a85cb24fSFrançois Tigeot  *   the sample_type bits). As an example; Sourab Gupta had been looking to
169a85cb24fSFrançois Tigeot  *   attach GPU timestamps to our OA samples. We were shoehorning OA reports
170a85cb24fSFrançois Tigeot  *   into sample records by using the 'raw' field, but it's tricky to pack more
171a85cb24fSFrançois Tigeot  *   than one thing into this field because events/core.c currently only lets a
172a85cb24fSFrançois Tigeot  *   pmu give a single raw data pointer plus len which will be copied into the
173a85cb24fSFrançois Tigeot  *   ring buffer. To include more than the OA report we'd have to copy the
174a85cb24fSFrançois Tigeot  *   report into an intermediate larger buffer. I'd been considering allowing a
175a85cb24fSFrançois Tigeot  *   vector of data+len values to be specified for copying the raw data, but
176a85cb24fSFrançois Tigeot  *   it felt like a kludge to being using the raw field for this purpose.
177a85cb24fSFrançois Tigeot  *
178a85cb24fSFrançois Tigeot  * - It felt like our perf based PMU was making some technical compromises
179a85cb24fSFrançois Tigeot  *   just for the sake of using perf:
180a85cb24fSFrançois Tigeot  *
181a85cb24fSFrançois Tigeot  *   perf_event_open() requires events to either relate to a pid or a specific
182a85cb24fSFrançois Tigeot  *   cpu core, while our device pmu related to neither.  Events opened with a
183a85cb24fSFrançois Tigeot  *   pid will be automatically enabled/disabled according to the scheduling of
184a85cb24fSFrançois Tigeot  *   that process - so not appropriate for us. When an event is related to a
185a85cb24fSFrançois Tigeot  *   cpu id, perf ensures pmu methods will be invoked via an inter process
186a85cb24fSFrançois Tigeot  *   interrupt on that core. To avoid invasive changes our userspace opened OA
187a85cb24fSFrançois Tigeot  *   perf events for a specific cpu. This was workable but it meant the
188a85cb24fSFrançois Tigeot  *   majority of the OA driver ran in atomic context, including all OA report
189a85cb24fSFrançois Tigeot  *   forwarding, which wasn't really necessary in our case and seems to make
190a85cb24fSFrançois Tigeot  *   our locking requirements somewhat complex as we handled the interaction
191a85cb24fSFrançois Tigeot  *   with the rest of the i915 driver.
192a85cb24fSFrançois Tigeot  */
193a85cb24fSFrançois Tigeot 
194a85cb24fSFrançois Tigeot #include <linux/anon_inodes.h>
195a85cb24fSFrançois Tigeot #include <linux/sizes.h>
196*3f2dd94aSFrançois Tigeot #include <linux/uuid.h>
197a85cb24fSFrançois Tigeot 
198a85cb24fSFrançois Tigeot #include "i915_drv.h"
199a85cb24fSFrançois Tigeot #include "i915_oa_hsw.h"
200*3f2dd94aSFrançois Tigeot #include "i915_oa_bdw.h"
201*3f2dd94aSFrançois Tigeot #include "i915_oa_chv.h"
202*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt2.h"
203*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt3.h"
204*3f2dd94aSFrançois Tigeot #include "i915_oa_sklgt4.h"
205*3f2dd94aSFrançois Tigeot #include "i915_oa_bxt.h"
206*3f2dd94aSFrançois Tigeot #include "i915_oa_kblgt2.h"
207*3f2dd94aSFrançois Tigeot #include "i915_oa_kblgt3.h"
208*3f2dd94aSFrançois Tigeot #include "i915_oa_glk.h"
209*3f2dd94aSFrançois Tigeot #include "i915_oa_cflgt2.h"
210a85cb24fSFrançois Tigeot 
211a85cb24fSFrançois Tigeot /* HW requires this to be a power of two, between 128k and 16M, though driver
212a85cb24fSFrançois Tigeot  * is currently generally designed assuming the largest 16M size is used such
213a85cb24fSFrançois Tigeot  * that the overflow cases are unlikely in normal operation.
214a85cb24fSFrançois Tigeot  */
215a85cb24fSFrançois Tigeot #define OA_BUFFER_SIZE		SZ_16M
216a85cb24fSFrançois Tigeot 
217a85cb24fSFrançois Tigeot #define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
218a85cb24fSFrançois Tigeot 
219a85cb24fSFrançois Tigeot /* There's a HW race condition between OA unit tail pointer register updates and
220a85cb24fSFrançois Tigeot  * writes to memory whereby the tail pointer can sometimes get ahead of what's
221a85cb24fSFrançois Tigeot  * been written out to the OA buffer so far.
222a85cb24fSFrançois Tigeot  *
223a85cb24fSFrançois Tigeot  * Although this can be observed explicitly by checking for a zeroed report-id
224a85cb24fSFrançois Tigeot  * field in tail reports, it seems preferable to account for this earlier e.g.
225a85cb24fSFrançois Tigeot  * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
226a85cb24fSFrançois Tigeot  * in this situation.
227a85cb24fSFrançois Tigeot  *
228a85cb24fSFrançois Tigeot  * To give time for the most recent reports to land before they may be copied to
229a85cb24fSFrançois Tigeot  * userspace, the driver operates as if the tail pointer effectively lags behind
230a85cb24fSFrançois Tigeot  * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
231a85cb24fSFrançois Tigeot  * based on this constant in nanoseconds, the current OA sampling exponent
232a85cb24fSFrançois Tigeot  * and current report size.
233a85cb24fSFrançois Tigeot  *
234a85cb24fSFrançois Tigeot  * There is also a fallback check while reading to simply skip over reports with
235a85cb24fSFrançois Tigeot  * a zeroed report-id.
236a85cb24fSFrançois Tigeot  */
237a85cb24fSFrançois Tigeot #define OA_TAIL_MARGIN_NSEC	100000ULL
238a85cb24fSFrançois Tigeot 
239a85cb24fSFrançois Tigeot /* frequency for checking whether the OA unit has written new reports to the
240a85cb24fSFrançois Tigeot  * circular OA buffer...
241a85cb24fSFrançois Tigeot  */
242a85cb24fSFrançois Tigeot #define POLL_FREQUENCY 200
243a85cb24fSFrançois Tigeot #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
244a85cb24fSFrançois Tigeot 
245a85cb24fSFrançois Tigeot #if 0
246a85cb24fSFrançois Tigeot /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
247a85cb24fSFrançois Tigeot static int zero;
248a85cb24fSFrançois Tigeot static int one = 1;
249a85cb24fSFrançois Tigeot static u32 i915_perf_stream_paranoid = true;
250a85cb24fSFrançois Tigeot 
251a85cb24fSFrançois Tigeot /* The maximum exponent the hardware accepts is 63 (essentially it selects one
252a85cb24fSFrançois Tigeot  * of the 64bit timestamp bits to trigger reports from) but there's currently
253a85cb24fSFrançois Tigeot  * no known use case for sampling as infrequently as once per 47 thousand years.
254a85cb24fSFrançois Tigeot  *
255a85cb24fSFrançois Tigeot  * Since the timestamps included in OA reports are only 32bits it seems
256a85cb24fSFrançois Tigeot  * reasonable to limit the OA exponent where it's still possible to account for
257a85cb24fSFrançois Tigeot  * overflow in OA report timestamps.
258a85cb24fSFrançois Tigeot  */
259a85cb24fSFrançois Tigeot #define OA_EXPONENT_MAX 31
260a85cb24fSFrançois Tigeot 
261a85cb24fSFrançois Tigeot #define INVALID_CTX_ID 0xffffffff
262a85cb24fSFrançois Tigeot 
263*3f2dd94aSFrançois Tigeot /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
264*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_MASK           0x3f
265*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_SHIFT          19
266*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_TIMER          (1<<0)
267*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
268*3f2dd94aSFrançois Tigeot #define OAREPORT_REASON_CLK_RATIO      (1<<5)
269*3f2dd94aSFrançois Tigeot 
270a85cb24fSFrançois Tigeot 
271a85cb24fSFrançois Tigeot /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
272a85cb24fSFrançois Tigeot  *
273*3f2dd94aSFrançois Tigeot  * The highest sampling frequency we can theoretically program the OA unit
274*3f2dd94aSFrançois Tigeot  * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
275*3f2dd94aSFrançois Tigeot  *
276*3f2dd94aSFrançois Tigeot  * Initialized just before we register the sysctl parameter.
277a85cb24fSFrançois Tigeot  */
278*3f2dd94aSFrançois Tigeot static int oa_sample_rate_hard_limit;
279a85cb24fSFrançois Tigeot 
280a85cb24fSFrançois Tigeot /* Theoretically we can program the OA unit to sample every 160ns but don't
281a85cb24fSFrançois Tigeot  * allow that by default unless root...
282a85cb24fSFrançois Tigeot  *
283a85cb24fSFrançois Tigeot  * The default threshold of 100000Hz is based on perf's similar
284a85cb24fSFrançois Tigeot  * kernel.perf_event_max_sample_rate sysctl parameter.
285a85cb24fSFrançois Tigeot  */
286a85cb24fSFrançois Tigeot static u32 i915_oa_max_sample_rate = 100000;
287a85cb24fSFrançois Tigeot 
288a85cb24fSFrançois Tigeot /* XXX: beware if future OA HW adds new report formats that the current
289a85cb24fSFrançois Tigeot  * code assumes all reports have a power-of-two size and ~(size - 1) can
290a85cb24fSFrançois Tigeot  * be used as a mask to align the OA tail pointer.
291a85cb24fSFrançois Tigeot  */
292a85cb24fSFrançois Tigeot static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
293a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_A13]	    = { 0, 64 },
294a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_A29]	    = { 1, 128 },
295a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
296a85cb24fSFrançois Tigeot 	/* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
297a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_B4_C8]	    = { 4, 64 },
298a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_A45_B8_C8]  = { 5, 256 },
299a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_B4_C8_A16]  = { 6, 128 },
300a85cb24fSFrançois Tigeot 	[I915_OA_FORMAT_C4_B8]	    = { 7, 64 },
301a85cb24fSFrançois Tigeot };
302*3f2dd94aSFrançois Tigeot 
303*3f2dd94aSFrançois Tigeot static struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
304*3f2dd94aSFrançois Tigeot 	[I915_OA_FORMAT_A12]		    = { 0, 64 },
305*3f2dd94aSFrançois Tigeot 	[I915_OA_FORMAT_A12_B8_C8]	    = { 2, 128 },
306*3f2dd94aSFrançois Tigeot 	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
307*3f2dd94aSFrançois Tigeot 	[I915_OA_FORMAT_C4_B8]		    = { 7, 64 },
308*3f2dd94aSFrançois Tigeot };
309a85cb24fSFrançois Tigeot #endif
310a85cb24fSFrançois Tigeot 
311a85cb24fSFrançois Tigeot #define SAMPLE_OA_REPORT      (1<<0)
312a85cb24fSFrançois Tigeot 
313a85cb24fSFrançois Tigeot /**
314a85cb24fSFrançois Tigeot  * struct perf_open_properties - for validated properties given to open a stream
315a85cb24fSFrançois Tigeot  * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags
316a85cb24fSFrançois Tigeot  * @single_context: Whether a single or all gpu contexts should be monitored
317a85cb24fSFrançois Tigeot  * @ctx_handle: A gem ctx handle for use with @single_context
318a85cb24fSFrançois Tigeot  * @metrics_set: An ID for an OA unit metric set advertised via sysfs
319a85cb24fSFrançois Tigeot  * @oa_format: An OA unit HW report format
320a85cb24fSFrançois Tigeot  * @oa_periodic: Whether to enable periodic OA unit sampling
321a85cb24fSFrançois Tigeot  * @oa_period_exponent: The OA unit sampling period is derived from this
322a85cb24fSFrançois Tigeot  *
323a85cb24fSFrançois Tigeot  * As read_properties_unlocked() enumerates and validates the properties given
324a85cb24fSFrançois Tigeot  * to open a stream of metrics the configuration is built up in the structure
325a85cb24fSFrançois Tigeot  * which starts out zero initialized.
326a85cb24fSFrançois Tigeot  */
327a85cb24fSFrançois Tigeot struct perf_open_properties {
328a85cb24fSFrançois Tigeot 	u32 sample_flags;
329a85cb24fSFrançois Tigeot 
330a85cb24fSFrançois Tigeot 	u64 single_context:1;
331a85cb24fSFrançois Tigeot 	u64 ctx_handle;
332a85cb24fSFrançois Tigeot 
333a85cb24fSFrançois Tigeot 	/* OA sampling state */
334a85cb24fSFrançois Tigeot 	int metrics_set;
335a85cb24fSFrançois Tigeot 	int oa_format;
336a85cb24fSFrançois Tigeot 	bool oa_periodic;
337a85cb24fSFrançois Tigeot 	int oa_period_exponent;
338a85cb24fSFrançois Tigeot };
339a85cb24fSFrançois Tigeot 
340a85cb24fSFrançois Tigeot #if 0
341a85cb24fSFrançois Tigeot /* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
342a85cb24fSFrançois Tigeot  *
343a85cb24fSFrançois Tigeot  * It's safe to read OA config state here unlocked, assuming that this is only
344a85cb24fSFrançois Tigeot  * called while the stream is enabled, while the global OA configuration can't
345a85cb24fSFrançois Tigeot  * be modified.
346a85cb24fSFrançois Tigeot  *
347a85cb24fSFrançois Tigeot  * Note: we don't lock around the head/tail reads even though there's the slim
348a85cb24fSFrançois Tigeot  * possibility of read() fop errors forcing a re-init of the OA buffer
349a85cb24fSFrançois Tigeot  * pointers.  A race here could result in a false positive !empty status which
350a85cb24fSFrançois Tigeot  * is acceptable.
351a85cb24fSFrançois Tigeot  */
352a85cb24fSFrançois Tigeot static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
353a85cb24fSFrançois Tigeot {
354a85cb24fSFrançois Tigeot 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
355a85cb24fSFrançois Tigeot 	u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
356a85cb24fSFrançois Tigeot 	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
357a85cb24fSFrançois Tigeot 	u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
358a85cb24fSFrançois Tigeot 	u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
359a85cb24fSFrançois Tigeot 
360a85cb24fSFrançois Tigeot 	return OA_TAKEN(tail, head) <
361a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.tail_margin + report_size;
362a85cb24fSFrançois Tigeot }
363a85cb24fSFrançois Tigeot 
364a85cb24fSFrançois Tigeot /**
365a85cb24fSFrançois Tigeot  * append_oa_status - Appends a status record to a userspace read() buffer.
366a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
367a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
368a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
369a85cb24fSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
370a85cb24fSFrançois Tigeot  * @type: The kind of status to report to userspace
371a85cb24fSFrançois Tigeot  *
372a85cb24fSFrançois Tigeot  * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
373a85cb24fSFrançois Tigeot  * into the userspace read() buffer.
374a85cb24fSFrançois Tigeot  *
375a85cb24fSFrançois Tigeot  * The @buf @offset will only be updated on success.
376a85cb24fSFrançois Tigeot  *
377a85cb24fSFrançois Tigeot  * Returns: 0 on success, negative error code on failure.
378a85cb24fSFrançois Tigeot  */
379a85cb24fSFrançois Tigeot static int append_oa_status(struct i915_perf_stream *stream,
380a85cb24fSFrançois Tigeot 			    char __user *buf,
381a85cb24fSFrançois Tigeot 			    size_t count,
382a85cb24fSFrançois Tigeot 			    size_t *offset,
383a85cb24fSFrançois Tigeot 			    enum drm_i915_perf_record_type type)
384a85cb24fSFrançois Tigeot {
385a85cb24fSFrançois Tigeot 	struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
386a85cb24fSFrançois Tigeot 
387a85cb24fSFrançois Tigeot 	if ((count - *offset) < header.size)
388a85cb24fSFrançois Tigeot 		return -ENOSPC;
389a85cb24fSFrançois Tigeot 
390a85cb24fSFrançois Tigeot 	if (copy_to_user(buf + *offset, &header, sizeof(header)))
391a85cb24fSFrançois Tigeot 		return -EFAULT;
392a85cb24fSFrançois Tigeot 
393a85cb24fSFrançois Tigeot 	(*offset) += header.size;
394a85cb24fSFrançois Tigeot 
395a85cb24fSFrançois Tigeot 	return 0;
396a85cb24fSFrançois Tigeot }
397a85cb24fSFrançois Tigeot 
398a85cb24fSFrançois Tigeot /**
399a85cb24fSFrançois Tigeot  * append_oa_sample - Copies single OA report into userspace read() buffer.
400a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
401a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
402a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
403a85cb24fSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
404a85cb24fSFrançois Tigeot  * @report: A single OA report to (optionally) include as part of the sample
405a85cb24fSFrançois Tigeot  *
406a85cb24fSFrançois Tigeot  * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
407a85cb24fSFrançois Tigeot  * properties when opening a stream, tracked as `stream->sample_flags`. This
408a85cb24fSFrançois Tigeot  * function copies the requested components of a single sample to the given
409a85cb24fSFrançois Tigeot  * read() @buf.
410a85cb24fSFrançois Tigeot  *
411a85cb24fSFrançois Tigeot  * The @buf @offset will only be updated on success.
412a85cb24fSFrançois Tigeot  *
413a85cb24fSFrançois Tigeot  * Returns: 0 on success, negative error code on failure.
414a85cb24fSFrançois Tigeot  */
415a85cb24fSFrançois Tigeot static int append_oa_sample(struct i915_perf_stream *stream,
416a85cb24fSFrançois Tigeot 			    char __user *buf,
417a85cb24fSFrançois Tigeot 			    size_t count,
418a85cb24fSFrançois Tigeot 			    size_t *offset,
419a85cb24fSFrançois Tigeot 			    const u8 *report)
420a85cb24fSFrançois Tigeot {
421a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
422a85cb24fSFrançois Tigeot 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
423a85cb24fSFrançois Tigeot 	struct drm_i915_perf_record_header header;
424a85cb24fSFrançois Tigeot 	u32 sample_flags = stream->sample_flags;
425a85cb24fSFrançois Tigeot 
426a85cb24fSFrançois Tigeot 	header.type = DRM_I915_PERF_RECORD_SAMPLE;
427a85cb24fSFrançois Tigeot 	header.pad = 0;
428a85cb24fSFrançois Tigeot 	header.size = stream->sample_size;
429a85cb24fSFrançois Tigeot 
430a85cb24fSFrançois Tigeot 	if ((count - *offset) < header.size)
431a85cb24fSFrançois Tigeot 		return -ENOSPC;
432a85cb24fSFrançois Tigeot 
433a85cb24fSFrançois Tigeot 	buf += *offset;
434a85cb24fSFrançois Tigeot 	if (copy_to_user(buf, &header, sizeof(header)))
435a85cb24fSFrançois Tigeot 		return -EFAULT;
436a85cb24fSFrançois Tigeot 	buf += sizeof(header);
437a85cb24fSFrançois Tigeot 
438a85cb24fSFrançois Tigeot 	if (sample_flags & SAMPLE_OA_REPORT) {
439a85cb24fSFrançois Tigeot 		if (copy_to_user(buf, report, report_size))
440a85cb24fSFrançois Tigeot 			return -EFAULT;
441a85cb24fSFrançois Tigeot 	}
442a85cb24fSFrançois Tigeot 
443a85cb24fSFrançois Tigeot 	(*offset) += header.size;
444a85cb24fSFrançois Tigeot 
445a85cb24fSFrançois Tigeot 	return 0;
446a85cb24fSFrançois Tigeot }
447a85cb24fSFrançois Tigeot 
448a85cb24fSFrançois Tigeot /**
449a85cb24fSFrançois Tigeot  * Copies all buffered OA reports into userspace read() buffer.
450a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
451a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
452a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
453a85cb24fSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
454*3f2dd94aSFrançois Tigeot  *
455*3f2dd94aSFrançois Tigeot  * Notably any error condition resulting in a short read (-%ENOSPC or
456*3f2dd94aSFrançois Tigeot  * -%EFAULT) will be returned even though one or more records may
457*3f2dd94aSFrançois Tigeot  * have been successfully copied. In this case it's up to the caller
458*3f2dd94aSFrançois Tigeot  * to decide if the error should be squashed before returning to
459*3f2dd94aSFrançois Tigeot  * userspace.
460*3f2dd94aSFrançois Tigeot  *
461*3f2dd94aSFrançois Tigeot  * Note: reports are consumed from the head, and appended to the
462*3f2dd94aSFrançois Tigeot  * tail, so the tail chases the head?... If you think that's mad
463*3f2dd94aSFrançois Tigeot  * and back-to-front you're not alone, but this follows the
464*3f2dd94aSFrançois Tigeot  * Gen PRM naming convention.
465*3f2dd94aSFrançois Tigeot  *
466*3f2dd94aSFrançois Tigeot  * Returns: 0 on success, negative error code on failure.
467*3f2dd94aSFrançois Tigeot  */
468*3f2dd94aSFrançois Tigeot static int gen8_append_oa_reports(struct i915_perf_stream *stream,
469*3f2dd94aSFrançois Tigeot 				  char __user *buf,
470*3f2dd94aSFrançois Tigeot 				  size_t count,
471*3f2dd94aSFrançois Tigeot 				  size_t *offset)
472*3f2dd94aSFrançois Tigeot {
473*3f2dd94aSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
474*3f2dd94aSFrançois Tigeot 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
475*3f2dd94aSFrançois Tigeot 	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
476*3f2dd94aSFrançois Tigeot 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
477*3f2dd94aSFrançois Tigeot 	u32 mask = (OA_BUFFER_SIZE - 1);
478*3f2dd94aSFrançois Tigeot 	size_t start_offset = *offset;
479*3f2dd94aSFrançois Tigeot 	unsigned long flags;
480*3f2dd94aSFrançois Tigeot 	unsigned int aged_tail_idx;
481*3f2dd94aSFrançois Tigeot 	u32 head, tail;
482*3f2dd94aSFrançois Tigeot 	u32 taken;
483*3f2dd94aSFrançois Tigeot 	int ret = 0;
484*3f2dd94aSFrançois Tigeot 
485*3f2dd94aSFrançois Tigeot 	if (WARN_ON(!stream->enabled))
486*3f2dd94aSFrançois Tigeot 		return -EIO;
487*3f2dd94aSFrançois Tigeot 
488*3f2dd94aSFrançois Tigeot 	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
489*3f2dd94aSFrançois Tigeot 
490*3f2dd94aSFrançois Tigeot 	head = dev_priv->perf.oa.oa_buffer.head;
491*3f2dd94aSFrançois Tigeot 	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
492*3f2dd94aSFrançois Tigeot 	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
493*3f2dd94aSFrançois Tigeot 
494*3f2dd94aSFrançois Tigeot 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
495*3f2dd94aSFrançois Tigeot 
496*3f2dd94aSFrançois Tigeot 	/*
497*3f2dd94aSFrançois Tigeot 	 * An invalid tail pointer here means we're still waiting for the poll
498*3f2dd94aSFrançois Tigeot 	 * hrtimer callback to give us a pointer
499*3f2dd94aSFrançois Tigeot 	 */
500*3f2dd94aSFrançois Tigeot 	if (tail == INVALID_TAIL_PTR)
501*3f2dd94aSFrançois Tigeot 		return -EAGAIN;
502*3f2dd94aSFrançois Tigeot 
503*3f2dd94aSFrançois Tigeot 	/*
504*3f2dd94aSFrançois Tigeot 	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
505*3f2dd94aSFrançois Tigeot 	 * while indexing relative to oa_buf_base.
506*3f2dd94aSFrançois Tigeot 	 */
507*3f2dd94aSFrançois Tigeot 	head -= gtt_offset;
508*3f2dd94aSFrançois Tigeot 	tail -= gtt_offset;
509*3f2dd94aSFrançois Tigeot 
510*3f2dd94aSFrançois Tigeot 	/*
511*3f2dd94aSFrançois Tigeot 	 * An out of bounds or misaligned head or tail pointer implies a driver
512*3f2dd94aSFrançois Tigeot 	 * bug since we validate + align the tail pointers we read from the
513*3f2dd94aSFrançois Tigeot 	 * hardware and we are in full control of the head pointer which should
514*3f2dd94aSFrançois Tigeot 	 * only be incremented by multiples of the report size (notably also
515*3f2dd94aSFrançois Tigeot 	 * all a power of two).
516*3f2dd94aSFrançois Tigeot 	 */
517*3f2dd94aSFrançois Tigeot 	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
518*3f2dd94aSFrançois Tigeot 		      tail > OA_BUFFER_SIZE || tail % report_size,
519*3f2dd94aSFrançois Tigeot 		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
520*3f2dd94aSFrançois Tigeot 		      head, tail))
521*3f2dd94aSFrançois Tigeot 		return -EIO;
522*3f2dd94aSFrançois Tigeot 
523*3f2dd94aSFrançois Tigeot 
524*3f2dd94aSFrançois Tigeot 	for (/* none */;
525*3f2dd94aSFrançois Tigeot 	     (taken = OA_TAKEN(tail, head));
526*3f2dd94aSFrançois Tigeot 	     head = (head + report_size) & mask) {
527*3f2dd94aSFrançois Tigeot 		u8 *report = oa_buf_base + head;
528*3f2dd94aSFrançois Tigeot 		u32 *report32 = (void *)report;
529*3f2dd94aSFrançois Tigeot 		u32 ctx_id;
530*3f2dd94aSFrançois Tigeot 		u32 reason;
531*3f2dd94aSFrançois Tigeot 
532*3f2dd94aSFrançois Tigeot 		/*
533*3f2dd94aSFrançois Tigeot 		 * All the report sizes factor neatly into the buffer
534*3f2dd94aSFrançois Tigeot 		 * size so we never expect to see a report split
535*3f2dd94aSFrançois Tigeot 		 * between the beginning and end of the buffer.
536*3f2dd94aSFrançois Tigeot 		 *
537*3f2dd94aSFrançois Tigeot 		 * Given the initial alignment check a misalignment
538*3f2dd94aSFrançois Tigeot 		 * here would imply a driver bug that would result
539*3f2dd94aSFrançois Tigeot 		 * in an overrun.
540*3f2dd94aSFrançois Tigeot 		 */
541*3f2dd94aSFrançois Tigeot 		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
542*3f2dd94aSFrançois Tigeot 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
543*3f2dd94aSFrançois Tigeot 			break;
544*3f2dd94aSFrançois Tigeot 		}
545*3f2dd94aSFrançois Tigeot 
546*3f2dd94aSFrançois Tigeot 		/*
547*3f2dd94aSFrançois Tigeot 		 * The reason field includes flags identifying what
548*3f2dd94aSFrançois Tigeot 		 * triggered this specific report (mostly timer
549*3f2dd94aSFrançois Tigeot 		 * triggered or e.g. due to a context switch).
550*3f2dd94aSFrançois Tigeot 		 *
551*3f2dd94aSFrançois Tigeot 		 * This field is never expected to be zero so we can
552*3f2dd94aSFrançois Tigeot 		 * check that the report isn't invalid before copying
553*3f2dd94aSFrançois Tigeot 		 * it to userspace...
554*3f2dd94aSFrançois Tigeot 		 */
555*3f2dd94aSFrançois Tigeot 		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
556*3f2dd94aSFrançois Tigeot 			  OAREPORT_REASON_MASK);
557*3f2dd94aSFrançois Tigeot 		if (reason == 0) {
558*3f2dd94aSFrançois Tigeot 			if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
559*3f2dd94aSFrançois Tigeot 				DRM_NOTE("Skipping spurious, invalid OA report\n");
560*3f2dd94aSFrançois Tigeot 			continue;
561*3f2dd94aSFrançois Tigeot 		}
562*3f2dd94aSFrançois Tigeot 
563*3f2dd94aSFrançois Tigeot 		/*
564*3f2dd94aSFrançois Tigeot 		 * XXX: Just keep the lower 21 bits for now since I'm not
565*3f2dd94aSFrançois Tigeot 		 * entirely sure if the HW touches any of the higher bits in
566*3f2dd94aSFrançois Tigeot 		 * this field
567*3f2dd94aSFrançois Tigeot 		 */
568*3f2dd94aSFrançois Tigeot 		ctx_id = report32[2] & 0x1fffff;
569*3f2dd94aSFrançois Tigeot 
570*3f2dd94aSFrançois Tigeot 		/*
571*3f2dd94aSFrançois Tigeot 		 * Squash whatever is in the CTX_ID field if it's marked as
572*3f2dd94aSFrançois Tigeot 		 * invalid to be sure we avoid false-positive, single-context
573*3f2dd94aSFrançois Tigeot 		 * filtering below...
574*3f2dd94aSFrançois Tigeot 		 *
575*3f2dd94aSFrançois Tigeot 		 * Note: that we don't clear the valid_ctx_bit so userspace can
576*3f2dd94aSFrançois Tigeot 		 * understand that the ID has been squashed by the kernel.
577*3f2dd94aSFrançois Tigeot 		 */
578*3f2dd94aSFrançois Tigeot 		if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
579*3f2dd94aSFrançois Tigeot 			ctx_id = report32[2] = INVALID_CTX_ID;
580*3f2dd94aSFrançois Tigeot 
581*3f2dd94aSFrançois Tigeot 		/*
582*3f2dd94aSFrançois Tigeot 		 * NB: For Gen 8 the OA unit no longer supports clock gating
583*3f2dd94aSFrançois Tigeot 		 * off for a specific context and the kernel can't securely
584*3f2dd94aSFrançois Tigeot 		 * stop the counters from updating as system-wide / global
585*3f2dd94aSFrançois Tigeot 		 * values.
586*3f2dd94aSFrançois Tigeot 		 *
587*3f2dd94aSFrançois Tigeot 		 * Automatic reports now include a context ID so reports can be
588*3f2dd94aSFrançois Tigeot 		 * filtered on the cpu but it's not worth trying to
589*3f2dd94aSFrançois Tigeot 		 * automatically subtract/hide counter progress for other
590*3f2dd94aSFrançois Tigeot 		 * contexts while filtering since we can't stop userspace
591*3f2dd94aSFrançois Tigeot 		 * issuing MI_REPORT_PERF_COUNT commands which would still
592*3f2dd94aSFrançois Tigeot 		 * provide a side-band view of the real values.
593*3f2dd94aSFrançois Tigeot 		 *
594*3f2dd94aSFrançois Tigeot 		 * To allow userspace (such as Mesa/GL_INTEL_performance_query)
595*3f2dd94aSFrançois Tigeot 		 * to normalize counters for a single filtered context then it
596*3f2dd94aSFrançois Tigeot 		 * needs be forwarded bookend context-switch reports so that it
597*3f2dd94aSFrançois Tigeot 		 * can track switches in between MI_REPORT_PERF_COUNT commands
598*3f2dd94aSFrançois Tigeot 		 * and can itself subtract/ignore the progress of counters
599*3f2dd94aSFrançois Tigeot 		 * associated with other contexts. Note that the hardware
600*3f2dd94aSFrançois Tigeot 		 * automatically triggers reports when switching to a new
601*3f2dd94aSFrançois Tigeot 		 * context which are tagged with the ID of the newly active
602*3f2dd94aSFrançois Tigeot 		 * context. To avoid the complexity (and likely fragility) of
603*3f2dd94aSFrançois Tigeot 		 * reading ahead while parsing reports to try and minimize
604*3f2dd94aSFrançois Tigeot 		 * forwarding redundant context switch reports (i.e. between
605*3f2dd94aSFrançois Tigeot 		 * other, unrelated contexts) we simply elect to forward them
606*3f2dd94aSFrançois Tigeot 		 * all.
607*3f2dd94aSFrançois Tigeot 		 *
608*3f2dd94aSFrançois Tigeot 		 * We don't rely solely on the reason field to identify context
609*3f2dd94aSFrançois Tigeot 		 * switches since it's not-uncommon for periodic samples to
610*3f2dd94aSFrançois Tigeot 		 * identify a switch before any 'context switch' report.
611*3f2dd94aSFrançois Tigeot 		 */
612*3f2dd94aSFrançois Tigeot 		if (!dev_priv->perf.oa.exclusive_stream->ctx ||
613*3f2dd94aSFrançois Tigeot 		    dev_priv->perf.oa.specific_ctx_id == ctx_id ||
614*3f2dd94aSFrançois Tigeot 		    (dev_priv->perf.oa.oa_buffer.last_ctx_id ==
615*3f2dd94aSFrançois Tigeot 		     dev_priv->perf.oa.specific_ctx_id) ||
616*3f2dd94aSFrançois Tigeot 		    reason & OAREPORT_REASON_CTX_SWITCH) {
617*3f2dd94aSFrançois Tigeot 
618*3f2dd94aSFrançois Tigeot 			/*
619*3f2dd94aSFrançois Tigeot 			 * While filtering for a single context we avoid
620*3f2dd94aSFrançois Tigeot 			 * leaking the IDs of other contexts.
621*3f2dd94aSFrançois Tigeot 			 */
622*3f2dd94aSFrançois Tigeot 			if (dev_priv->perf.oa.exclusive_stream->ctx &&
623*3f2dd94aSFrançois Tigeot 			    dev_priv->perf.oa.specific_ctx_id != ctx_id) {
624*3f2dd94aSFrançois Tigeot 				report32[2] = INVALID_CTX_ID;
625*3f2dd94aSFrançois Tigeot 			}
626*3f2dd94aSFrançois Tigeot 
627*3f2dd94aSFrançois Tigeot 			ret = append_oa_sample(stream, buf, count, offset,
628*3f2dd94aSFrançois Tigeot 					       report);
629*3f2dd94aSFrançois Tigeot 			if (ret)
630*3f2dd94aSFrançois Tigeot 				break;
631*3f2dd94aSFrançois Tigeot 
632*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
633*3f2dd94aSFrançois Tigeot 		}
634*3f2dd94aSFrançois Tigeot 
635*3f2dd94aSFrançois Tigeot 		/*
636*3f2dd94aSFrançois Tigeot 		 * The above reason field sanity check is based on
637*3f2dd94aSFrançois Tigeot 		 * the assumption that the OA buffer is initially
638*3f2dd94aSFrançois Tigeot 		 * zeroed and we reset the field after copying so the
639*3f2dd94aSFrançois Tigeot 		 * check is still meaningful once old reports start
640*3f2dd94aSFrançois Tigeot 		 * being overwritten.
641*3f2dd94aSFrançois Tigeot 		 */
642*3f2dd94aSFrançois Tigeot 		report32[0] = 0;
643*3f2dd94aSFrançois Tigeot 	}
644*3f2dd94aSFrançois Tigeot 
645*3f2dd94aSFrançois Tigeot 	if (start_offset != *offset) {
646*3f2dd94aSFrançois Tigeot 		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
647*3f2dd94aSFrançois Tigeot 
648*3f2dd94aSFrançois Tigeot 		/*
649*3f2dd94aSFrançois Tigeot 		 * We removed the gtt_offset for the copy loop above, indexing
650*3f2dd94aSFrançois Tigeot 		 * relative to oa_buf_base so put back here...
651*3f2dd94aSFrançois Tigeot 		 */
652*3f2dd94aSFrançois Tigeot 		head += gtt_offset;
653*3f2dd94aSFrançois Tigeot 
654*3f2dd94aSFrançois Tigeot 		I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
655*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.oa_buffer.head = head;
656*3f2dd94aSFrançois Tigeot 
657*3f2dd94aSFrançois Tigeot 		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
658*3f2dd94aSFrançois Tigeot 	}
659*3f2dd94aSFrançois Tigeot 
660*3f2dd94aSFrançois Tigeot 	return ret;
661*3f2dd94aSFrançois Tigeot }
662*3f2dd94aSFrançois Tigeot 
663*3f2dd94aSFrançois Tigeot /**
664*3f2dd94aSFrançois Tigeot  * gen8_oa_read - copy status records then buffered OA reports
665*3f2dd94aSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
666*3f2dd94aSFrançois Tigeot  * @buf: destination buffer given by userspace
667*3f2dd94aSFrançois Tigeot  * @count: the number of bytes userspace wants to read
668*3f2dd94aSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
669*3f2dd94aSFrançois Tigeot  *
670*3f2dd94aSFrançois Tigeot  * Checks OA unit status registers and if necessary appends corresponding
671*3f2dd94aSFrançois Tigeot  * status records for userspace (such as for a buffer full condition) and then
672*3f2dd94aSFrançois Tigeot  * initiate appending any buffered OA reports.
673*3f2dd94aSFrançois Tigeot  *
674*3f2dd94aSFrançois Tigeot  * Updates @offset according to the number of bytes successfully copied into
675*3f2dd94aSFrançois Tigeot  * the userspace buffer.
676*3f2dd94aSFrançois Tigeot  *
677*3f2dd94aSFrançois Tigeot  * NB: some data may be successfully copied to the userspace buffer
678*3f2dd94aSFrançois Tigeot  * even if an error is returned, and this is reflected in the
679*3f2dd94aSFrançois Tigeot  * updated @offset.
680*3f2dd94aSFrançois Tigeot  *
681*3f2dd94aSFrançois Tigeot  * Returns: zero on success or a negative error code
682*3f2dd94aSFrançois Tigeot  */
683*3f2dd94aSFrançois Tigeot static int gen8_oa_read(struct i915_perf_stream *stream,
684*3f2dd94aSFrançois Tigeot 			char __user *buf,
685*3f2dd94aSFrançois Tigeot 			size_t count,
686*3f2dd94aSFrançois Tigeot 			size_t *offset)
687*3f2dd94aSFrançois Tigeot {
688*3f2dd94aSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
689*3f2dd94aSFrançois Tigeot 	u32 oastatus;
690*3f2dd94aSFrançois Tigeot 	int ret;
691*3f2dd94aSFrançois Tigeot 
692*3f2dd94aSFrançois Tigeot 	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
693*3f2dd94aSFrançois Tigeot 		return -EIO;
694*3f2dd94aSFrançois Tigeot 
695*3f2dd94aSFrançois Tigeot 	oastatus = I915_READ(GEN8_OASTATUS);
696*3f2dd94aSFrançois Tigeot 
697*3f2dd94aSFrançois Tigeot 	/*
698*3f2dd94aSFrançois Tigeot 	 * We treat OABUFFER_OVERFLOW as a significant error:
699*3f2dd94aSFrançois Tigeot 	 *
700*3f2dd94aSFrançois Tigeot 	 * Although theoretically we could handle this more gracefully
701*3f2dd94aSFrançois Tigeot 	 * sometimes, some Gens don't correctly suppress certain
702*3f2dd94aSFrançois Tigeot 	 * automatically triggered reports in this condition and so we
703*3f2dd94aSFrançois Tigeot 	 * have to assume that old reports are now being trampled
704*3f2dd94aSFrançois Tigeot 	 * over.
705*3f2dd94aSFrançois Tigeot 	 *
706*3f2dd94aSFrançois Tigeot 	 * Considering how we don't currently give userspace control
707*3f2dd94aSFrançois Tigeot 	 * over the OA buffer size and always configure a large 16MB
708*3f2dd94aSFrançois Tigeot 	 * buffer, then a buffer overflow does anyway likely indicate
709*3f2dd94aSFrançois Tigeot 	 * that something has gone quite badly wrong.
710*3f2dd94aSFrançois Tigeot 	 */
711*3f2dd94aSFrançois Tigeot 	if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) {
712*3f2dd94aSFrançois Tigeot 		ret = append_oa_status(stream, buf, count, offset,
713*3f2dd94aSFrançois Tigeot 				       DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
714*3f2dd94aSFrançois Tigeot 		if (ret)
715*3f2dd94aSFrançois Tigeot 			return ret;
716*3f2dd94aSFrançois Tigeot 
717*3f2dd94aSFrançois Tigeot 		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
718*3f2dd94aSFrançois Tigeot 			  dev_priv->perf.oa.period_exponent);
719*3f2dd94aSFrançois Tigeot 
720*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_disable(dev_priv);
721*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
722*3f2dd94aSFrançois Tigeot 
723*3f2dd94aSFrançois Tigeot 		/*
724*3f2dd94aSFrançois Tigeot 		 * Note: .oa_enable() is expected to re-init the oabuffer and
725*3f2dd94aSFrançois Tigeot 		 * reset GEN8_OASTATUS for us
726*3f2dd94aSFrançois Tigeot 		 */
727*3f2dd94aSFrançois Tigeot 		oastatus = I915_READ(GEN8_OASTATUS);
728*3f2dd94aSFrançois Tigeot 	}
729*3f2dd94aSFrançois Tigeot 
730*3f2dd94aSFrançois Tigeot 	if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
731*3f2dd94aSFrançois Tigeot 		ret = append_oa_status(stream, buf, count, offset,
732*3f2dd94aSFrançois Tigeot 				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
733*3f2dd94aSFrançois Tigeot 		if (ret)
734*3f2dd94aSFrançois Tigeot 			return ret;
735*3f2dd94aSFrançois Tigeot 		I915_WRITE(GEN8_OASTATUS,
736*3f2dd94aSFrançois Tigeot 			   oastatus & ~GEN8_OASTATUS_REPORT_LOST);
737*3f2dd94aSFrançois Tigeot 	}
738*3f2dd94aSFrançois Tigeot 
739*3f2dd94aSFrançois Tigeot 	return gen8_append_oa_reports(stream, buf, count, offset);
740*3f2dd94aSFrançois Tigeot }
741*3f2dd94aSFrançois Tigeot 
742*3f2dd94aSFrançois Tigeot /**
743*3f2dd94aSFrançois Tigeot  * Copies all buffered OA reports into userspace read() buffer.
744*3f2dd94aSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
745*3f2dd94aSFrançois Tigeot  * @buf: destination buffer given by userspace
746*3f2dd94aSFrançois Tigeot  * @count: the number of bytes userspace wants to read
747*3f2dd94aSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
748a85cb24fSFrançois Tigeot  * @head_ptr: (inout): the current oa buffer cpu read position
749a85cb24fSFrançois Tigeot  * @tail: the current oa buffer gpu write position
750a85cb24fSFrançois Tigeot  *
751a85cb24fSFrançois Tigeot  * Notably any error condition resulting in a short read (-%ENOSPC or
752a85cb24fSFrançois Tigeot  * -%EFAULT) will be returned even though one or more records may
753a85cb24fSFrançois Tigeot  * have been successfully copied. In this case it's up to the caller
754a85cb24fSFrançois Tigeot  * to decide if the error should be squashed before returning to
755a85cb24fSFrançois Tigeot  * userspace.
756a85cb24fSFrançois Tigeot  *
757a85cb24fSFrançois Tigeot  * Note: reports are consumed from the head, and appended to the
758a85cb24fSFrançois Tigeot  * tail, so the head chases the tail?... If you think that's mad
759a85cb24fSFrançois Tigeot  * and back-to-front you're not alone, but this follows the
760a85cb24fSFrançois Tigeot  * Gen PRM naming convention.
761a85cb24fSFrançois Tigeot  *
762a85cb24fSFrançois Tigeot  * Returns: 0 on success, negative error code on failure.
763a85cb24fSFrançois Tigeot  */
764a85cb24fSFrançois Tigeot static int gen7_append_oa_reports(struct i915_perf_stream *stream,
765a85cb24fSFrançois Tigeot 				  char __user *buf,
766a85cb24fSFrançois Tigeot 				  size_t count,
767a85cb24fSFrançois Tigeot 				  size_t *offset,
768a85cb24fSFrançois Tigeot 				  u32 *head_ptr,
769a85cb24fSFrançois Tigeot 				  u32 tail)
770a85cb24fSFrançois Tigeot {
771a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
772a85cb24fSFrançois Tigeot 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
773a85cb24fSFrançois Tigeot 	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
774a85cb24fSFrançois Tigeot 	int tail_margin = dev_priv->perf.oa.tail_margin;
775a85cb24fSFrançois Tigeot 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
776a85cb24fSFrançois Tigeot 	u32 mask = (OA_BUFFER_SIZE - 1);
777a85cb24fSFrançois Tigeot 	u32 head;
778a85cb24fSFrançois Tigeot 	u32 taken;
779a85cb24fSFrançois Tigeot 	int ret = 0;
780a85cb24fSFrançois Tigeot 
781a85cb24fSFrançois Tigeot 	if (WARN_ON(!stream->enabled))
782a85cb24fSFrançois Tigeot 		return -EIO;
783a85cb24fSFrançois Tigeot 
784a85cb24fSFrançois Tigeot 	head = *head_ptr - gtt_offset;
785a85cb24fSFrançois Tigeot 	tail -= gtt_offset;
786a85cb24fSFrançois Tigeot 
787a85cb24fSFrançois Tigeot 	/* The OA unit is expected to wrap the tail pointer according to the OA
788a85cb24fSFrançois Tigeot 	 * buffer size and since we should never write a misaligned head
789a85cb24fSFrançois Tigeot 	 * pointer we don't expect to read one back either...
790a85cb24fSFrançois Tigeot 	 */
791a85cb24fSFrançois Tigeot 	if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
792a85cb24fSFrançois Tigeot 	    head % report_size) {
793a85cb24fSFrançois Tigeot 		DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
794a85cb24fSFrançois Tigeot 			  head, tail);
795a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_disable(dev_priv);
796a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
797a85cb24fSFrançois Tigeot 		*head_ptr = I915_READ(GEN7_OASTATUS2) &
798a85cb24fSFrançois Tigeot 			GEN7_OASTATUS2_HEAD_MASK;
799a85cb24fSFrançois Tigeot 		return -EIO;
800a85cb24fSFrançois Tigeot 	}
801a85cb24fSFrançois Tigeot 
802a85cb24fSFrançois Tigeot 
803a85cb24fSFrançois Tigeot 	/* The tail pointer increases in 64 byte increments, not in report_size
804a85cb24fSFrançois Tigeot 	 * steps...
805a85cb24fSFrançois Tigeot 	 */
806a85cb24fSFrançois Tigeot 	tail &= ~(report_size - 1);
807a85cb24fSFrançois Tigeot 
808a85cb24fSFrançois Tigeot 	/* Move the tail pointer back by the current tail_margin to account for
809a85cb24fSFrançois Tigeot 	 * the possibility that the latest reports may not have really landed
810a85cb24fSFrançois Tigeot 	 * in memory yet...
811a85cb24fSFrançois Tigeot 	 */
812a85cb24fSFrançois Tigeot 
813a85cb24fSFrançois Tigeot 	if (OA_TAKEN(tail, head) < report_size + tail_margin)
814a85cb24fSFrançois Tigeot 		return -EAGAIN;
815a85cb24fSFrançois Tigeot 
816a85cb24fSFrançois Tigeot 	tail -= tail_margin;
817a85cb24fSFrançois Tigeot 	tail &= mask;
818a85cb24fSFrançois Tigeot 
819a85cb24fSFrançois Tigeot 	for (/* none */;
820a85cb24fSFrançois Tigeot 	     (taken = OA_TAKEN(tail, head));
821a85cb24fSFrançois Tigeot 	     head = (head + report_size) & mask) {
822a85cb24fSFrançois Tigeot 		u8 *report = oa_buf_base + head;
823a85cb24fSFrançois Tigeot 		u32 *report32 = (void *)report;
824a85cb24fSFrançois Tigeot 
825a85cb24fSFrançois Tigeot 		/* All the report sizes factor neatly into the buffer
826a85cb24fSFrançois Tigeot 		 * size so we never expect to see a report split
827a85cb24fSFrançois Tigeot 		 * between the beginning and end of the buffer.
828a85cb24fSFrançois Tigeot 		 *
829a85cb24fSFrançois Tigeot 		 * Given the initial alignment check a misalignment
830a85cb24fSFrançois Tigeot 		 * here would imply a driver bug that would result
831a85cb24fSFrançois Tigeot 		 * in an overrun.
832a85cb24fSFrançois Tigeot 		 */
833a85cb24fSFrançois Tigeot 		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
834a85cb24fSFrançois Tigeot 			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
835a85cb24fSFrançois Tigeot 			break;
836a85cb24fSFrançois Tigeot 		}
837a85cb24fSFrançois Tigeot 
838a85cb24fSFrançois Tigeot 		/* The report-ID field for periodic samples includes
839a85cb24fSFrançois Tigeot 		 * some undocumented flags related to what triggered
840a85cb24fSFrançois Tigeot 		 * the report and is never expected to be zero so we
841a85cb24fSFrançois Tigeot 		 * can check that the report isn't invalid before
842a85cb24fSFrançois Tigeot 		 * copying it to userspace...
843a85cb24fSFrançois Tigeot 		 */
844a85cb24fSFrançois Tigeot 		if (report32[0] == 0) {
845a85cb24fSFrançois Tigeot 			DRM_NOTE("Skipping spurious, invalid OA report\n");
846a85cb24fSFrançois Tigeot 			continue;
847a85cb24fSFrançois Tigeot 		}
848a85cb24fSFrançois Tigeot 
849a85cb24fSFrançois Tigeot 		ret = append_oa_sample(stream, buf, count, offset, report);
850a85cb24fSFrançois Tigeot 		if (ret)
851a85cb24fSFrançois Tigeot 			break;
852a85cb24fSFrançois Tigeot 
853a85cb24fSFrançois Tigeot 		/* The above report-id field sanity check is based on
854a85cb24fSFrançois Tigeot 		 * the assumption that the OA buffer is initially
855a85cb24fSFrançois Tigeot 		 * zeroed and we reset the field after copying so the
856a85cb24fSFrançois Tigeot 		 * check is still meaningful once old reports start
857a85cb24fSFrançois Tigeot 		 * being overwritten.
858a85cb24fSFrançois Tigeot 		 */
859a85cb24fSFrançois Tigeot 		report32[0] = 0;
860a85cb24fSFrançois Tigeot 	}
861a85cb24fSFrançois Tigeot 
862a85cb24fSFrançois Tigeot 	*head_ptr = gtt_offset + head;
863a85cb24fSFrançois Tigeot 
864a85cb24fSFrançois Tigeot 	return ret;
865a85cb24fSFrançois Tigeot }
866a85cb24fSFrançois Tigeot 
867a85cb24fSFrançois Tigeot /**
868a85cb24fSFrançois Tigeot  * gen7_oa_read - copy status records then buffered OA reports
869a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
870a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
871a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
872a85cb24fSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
873a85cb24fSFrançois Tigeot  *
874a85cb24fSFrançois Tigeot  * Checks Gen 7 specific OA unit status registers and if necessary appends
875a85cb24fSFrançois Tigeot  * corresponding status records for userspace (such as for a buffer full
876a85cb24fSFrançois Tigeot  * condition) and then initiate appending any buffered OA reports.
877a85cb24fSFrançois Tigeot  *
878a85cb24fSFrançois Tigeot  * Updates @offset according to the number of bytes successfully copied into
879a85cb24fSFrançois Tigeot  * the userspace buffer.
880a85cb24fSFrançois Tigeot  *
881a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code
882a85cb24fSFrançois Tigeot  */
883a85cb24fSFrançois Tigeot static int gen7_oa_read(struct i915_perf_stream *stream,
884a85cb24fSFrançois Tigeot 			char __user *buf,
885a85cb24fSFrançois Tigeot 			size_t count,
886a85cb24fSFrançois Tigeot 			size_t *offset)
887a85cb24fSFrançois Tigeot {
888a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
889a85cb24fSFrançois Tigeot 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
890a85cb24fSFrançois Tigeot 	u32 oastatus2;
891a85cb24fSFrançois Tigeot 	u32 oastatus1;
892a85cb24fSFrançois Tigeot 	u32 head;
893a85cb24fSFrançois Tigeot 	u32 tail;
894a85cb24fSFrançois Tigeot 	int ret;
895a85cb24fSFrançois Tigeot 
896a85cb24fSFrançois Tigeot 	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
897a85cb24fSFrançois Tigeot 		return -EIO;
898a85cb24fSFrançois Tigeot 
899a85cb24fSFrançois Tigeot 	oastatus2 = I915_READ(GEN7_OASTATUS2);
900a85cb24fSFrançois Tigeot 	oastatus1 = I915_READ(GEN7_OASTATUS1);
901a85cb24fSFrançois Tigeot 
902a85cb24fSFrançois Tigeot 	head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
903a85cb24fSFrançois Tigeot 	tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
904a85cb24fSFrançois Tigeot 
905a85cb24fSFrançois Tigeot 	/* XXX: On Haswell we don't have a safe way to clear oastatus1
906a85cb24fSFrançois Tigeot 	 * bits while the OA unit is enabled (while the tail pointer
907a85cb24fSFrançois Tigeot 	 * may be updated asynchronously) so we ignore status bits
908a85cb24fSFrançois Tigeot 	 * that have already been reported to userspace.
909a85cb24fSFrançois Tigeot 	 */
910a85cb24fSFrançois Tigeot 	oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
911a85cb24fSFrançois Tigeot 
912a85cb24fSFrançois Tigeot 	/* We treat OABUFFER_OVERFLOW as a significant error:
913a85cb24fSFrançois Tigeot 	 *
914a85cb24fSFrançois Tigeot 	 * - The status can be interpreted to mean that the buffer is
915a85cb24fSFrançois Tigeot 	 *   currently full (with a higher precedence than OA_TAKEN()
916a85cb24fSFrançois Tigeot 	 *   which will start to report a near-empty buffer after an
917a85cb24fSFrançois Tigeot 	 *   overflow) but it's awkward that we can't clear the status
918a85cb24fSFrançois Tigeot 	 *   on Haswell, so without a reset we won't be able to catch
919a85cb24fSFrançois Tigeot 	 *   the state again.
920a85cb24fSFrançois Tigeot 	 *
921a85cb24fSFrançois Tigeot 	 * - Since it also implies the HW has started overwriting old
922a85cb24fSFrançois Tigeot 	 *   reports it may also affect our sanity checks for invalid
923a85cb24fSFrançois Tigeot 	 *   reports when copying to userspace that assume new reports
924a85cb24fSFrançois Tigeot 	 *   are being written to cleared memory.
925a85cb24fSFrançois Tigeot 	 *
926a85cb24fSFrançois Tigeot 	 * - In the future we may want to introduce a flight recorder
927a85cb24fSFrançois Tigeot 	 *   mode where the driver will automatically maintain a safe
928a85cb24fSFrançois Tigeot 	 *   guard band between head/tail, avoiding this overflow
929a85cb24fSFrançois Tigeot 	 *   condition, but we avoid the added driver complexity for
930a85cb24fSFrançois Tigeot 	 *   now.
931a85cb24fSFrançois Tigeot 	 */
932a85cb24fSFrançois Tigeot 	if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
933a85cb24fSFrançois Tigeot 		ret = append_oa_status(stream, buf, count, offset,
934a85cb24fSFrançois Tigeot 				       DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
935a85cb24fSFrançois Tigeot 		if (ret)
936a85cb24fSFrançois Tigeot 			return ret;
937a85cb24fSFrançois Tigeot 
938*3f2dd94aSFrançois Tigeot 		DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n",
939*3f2dd94aSFrançois Tigeot 			  dev_priv->perf.oa.period_exponent);
940a85cb24fSFrançois Tigeot 
941a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_disable(dev_priv);
942a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
943a85cb24fSFrançois Tigeot 
944a85cb24fSFrançois Tigeot 		oastatus2 = I915_READ(GEN7_OASTATUS2);
945a85cb24fSFrançois Tigeot 		oastatus1 = I915_READ(GEN7_OASTATUS1);
946a85cb24fSFrançois Tigeot 
947a85cb24fSFrançois Tigeot 		head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
948a85cb24fSFrançois Tigeot 		tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
949a85cb24fSFrançois Tigeot 	}
950a85cb24fSFrançois Tigeot 
951a85cb24fSFrançois Tigeot 	if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
952a85cb24fSFrançois Tigeot 		ret = append_oa_status(stream, buf, count, offset,
953a85cb24fSFrançois Tigeot 				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
954a85cb24fSFrançois Tigeot 		if (ret)
955a85cb24fSFrançois Tigeot 			return ret;
956a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.gen7_latched_oastatus1 |=
957a85cb24fSFrançois Tigeot 			GEN7_OASTATUS1_REPORT_LOST;
958a85cb24fSFrançois Tigeot 	}
959a85cb24fSFrançois Tigeot 
960a85cb24fSFrançois Tigeot 	ret = gen7_append_oa_reports(stream, buf, count, offset,
961a85cb24fSFrançois Tigeot 				     &head, tail);
962a85cb24fSFrançois Tigeot 
963a85cb24fSFrançois Tigeot 	/* All the report sizes are a power of two and the
964a85cb24fSFrançois Tigeot 	 * head should always be incremented by some multiple
965a85cb24fSFrançois Tigeot 	 * of the report size.
966a85cb24fSFrançois Tigeot 	 *
967a85cb24fSFrançois Tigeot 	 * A warning here, but notably if we later read back a
968a85cb24fSFrançois Tigeot 	 * misaligned pointer we will treat that as a bug since
969a85cb24fSFrançois Tigeot 	 * it could lead to a buffer overrun.
970a85cb24fSFrançois Tigeot 	 */
971a85cb24fSFrançois Tigeot 	WARN_ONCE(head & (report_size - 1),
972a85cb24fSFrançois Tigeot 		  "i915: Writing misaligned OA head pointer");
973a85cb24fSFrançois Tigeot 
974a85cb24fSFrançois Tigeot 	/* Note: we update the head pointer here even if an error
975a85cb24fSFrançois Tigeot 	 * was returned since the error may represent a short read
976a85cb24fSFrançois Tigeot 	 * where some some reports were successfully copied.
977a85cb24fSFrançois Tigeot 	 */
978a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_OASTATUS2,
979a85cb24fSFrançois Tigeot 		   ((head & GEN7_OASTATUS2_HEAD_MASK) |
980a85cb24fSFrançois Tigeot 		    OA_MEM_SELECT_GGTT));
981a85cb24fSFrançois Tigeot 
982a85cb24fSFrançois Tigeot 	return ret;
983a85cb24fSFrançois Tigeot }
984a85cb24fSFrançois Tigeot 
985a85cb24fSFrançois Tigeot /**
986a85cb24fSFrançois Tigeot  * i915_oa_wait_unlocked - handles blocking IO until OA data available
987a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
988a85cb24fSFrançois Tigeot  *
989a85cb24fSFrançois Tigeot  * Called when userspace tries to read() from a blocking stream FD opened
990a85cb24fSFrançois Tigeot  * for OA metrics. It waits until the hrtimer callback finds a non-empty
991a85cb24fSFrançois Tigeot  * OA buffer and wakes us.
992a85cb24fSFrançois Tigeot  *
993a85cb24fSFrançois Tigeot  * Note: it's acceptable to have this return with some false positives
994a85cb24fSFrançois Tigeot  * since any subsequent read handling will return -EAGAIN if there isn't
995a85cb24fSFrançois Tigeot  * really data ready for userspace yet.
996a85cb24fSFrançois Tigeot  *
997a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code
998a85cb24fSFrançois Tigeot  */
999a85cb24fSFrançois Tigeot static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
1000a85cb24fSFrançois Tigeot {
1001a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1002a85cb24fSFrançois Tigeot 
1003a85cb24fSFrançois Tigeot 	/* We would wait indefinitely if periodic sampling is not enabled */
1004a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.oa.periodic)
1005a85cb24fSFrançois Tigeot 		return -EIO;
1006a85cb24fSFrançois Tigeot 
1007a85cb24fSFrançois Tigeot 	/* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
1008a85cb24fSFrançois Tigeot 	 * just performs mmio reads of the OA buffer head + tail pointers and
1009a85cb24fSFrançois Tigeot 	 * it's assumed we're handling some operation that implies the stream
1010a85cb24fSFrançois Tigeot 	 * can't be destroyed until completion (such as a read()) that ensures
1011a85cb24fSFrançois Tigeot 	 * the device + OA buffer can't disappear
1012a85cb24fSFrançois Tigeot 	 */
1013a85cb24fSFrançois Tigeot 	return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
1014a85cb24fSFrançois Tigeot 					!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
1015a85cb24fSFrançois Tigeot }
1016a85cb24fSFrançois Tigeot 
1017a85cb24fSFrançois Tigeot /**
1018a85cb24fSFrançois Tigeot  * i915_oa_poll_wait - call poll_wait() for an OA stream poll()
1019a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
1020a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
1021a85cb24fSFrançois Tigeot  * @wait: poll() state table
1022a85cb24fSFrançois Tigeot  *
1023a85cb24fSFrançois Tigeot  * For handling userspace polling on an i915 perf stream opened for OA metrics,
1024a85cb24fSFrançois Tigeot  * this starts a poll_wait with the wait queue that our hrtimer callback wakes
1025a85cb24fSFrançois Tigeot  * when it sees data ready to read in the circular OA buffer.
1026a85cb24fSFrançois Tigeot  */
1027a85cb24fSFrançois Tigeot static void i915_oa_poll_wait(struct i915_perf_stream *stream,
1028a85cb24fSFrançois Tigeot 			      struct file *file,
1029a85cb24fSFrançois Tigeot 			      poll_table *wait)
1030a85cb24fSFrançois Tigeot {
1031a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1032a85cb24fSFrançois Tigeot 
1033a85cb24fSFrançois Tigeot 	poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
1034a85cb24fSFrançois Tigeot }
1035a85cb24fSFrançois Tigeot 
1036a85cb24fSFrançois Tigeot /**
1037a85cb24fSFrançois Tigeot  * i915_oa_read - just calls through to &i915_oa_ops->read
1038a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
1039a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
1040a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
1041a85cb24fSFrançois Tigeot  * @offset: (inout): the current position for writing into @buf
1042a85cb24fSFrançois Tigeot  *
1043a85cb24fSFrançois Tigeot  * Updates @offset according to the number of bytes successfully copied into
1044a85cb24fSFrançois Tigeot  * the userspace buffer.
1045a85cb24fSFrançois Tigeot  *
1046a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code
1047a85cb24fSFrançois Tigeot  */
1048a85cb24fSFrançois Tigeot static int i915_oa_read(struct i915_perf_stream *stream,
1049a85cb24fSFrançois Tigeot 			char __user *buf,
1050a85cb24fSFrançois Tigeot 			size_t count,
1051a85cb24fSFrançois Tigeot 			size_t *offset)
1052a85cb24fSFrançois Tigeot {
1053a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1054a85cb24fSFrançois Tigeot 
1055a85cb24fSFrançois Tigeot 	return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
1056a85cb24fSFrançois Tigeot }
1057a85cb24fSFrançois Tigeot 
1058a85cb24fSFrançois Tigeot /**
1059a85cb24fSFrançois Tigeot  * oa_get_render_ctx_id - determine and hold ctx hw id
1060a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
1061a85cb24fSFrançois Tigeot  *
1062a85cb24fSFrançois Tigeot  * Determine the render context hw id, and ensure it remains fixed for the
1063a85cb24fSFrançois Tigeot  * lifetime of the stream. This ensures that we don't have to worry about
1064a85cb24fSFrançois Tigeot  * updating the context ID in OACONTROL on the fly.
1065a85cb24fSFrançois Tigeot  *
1066a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code
1067a85cb24fSFrançois Tigeot  */
1068a85cb24fSFrançois Tigeot static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
1069a85cb24fSFrançois Tigeot {
1070a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1071*3f2dd94aSFrançois Tigeot 
1072*3f2dd94aSFrançois Tigeot 	if (i915_modparams.enable_execlists)
1073*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
1074*3f2dd94aSFrançois Tigeot 	else {
1075a85cb24fSFrançois Tigeot 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
1076*3f2dd94aSFrançois Tigeot 		struct intel_ring *ring;
1077a85cb24fSFrançois Tigeot 		int ret;
1078a85cb24fSFrançois Tigeot 
1079a85cb24fSFrançois Tigeot 		ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1080a85cb24fSFrançois Tigeot 		if (ret)
1081a85cb24fSFrançois Tigeot 			return ret;
1082a85cb24fSFrançois Tigeot 
1083*3f2dd94aSFrançois Tigeot 		/*
1084*3f2dd94aSFrançois Tigeot 		 * As the ID is the gtt offset of the context's vma we
1085*3f2dd94aSFrançois Tigeot 		 * pin the vma to ensure the ID remains fixed.
1086a85cb24fSFrançois Tigeot 		 *
1087a85cb24fSFrançois Tigeot 		 * NB: implied RCS engine...
1088a85cb24fSFrançois Tigeot 		 */
1089*3f2dd94aSFrançois Tigeot 		ring = engine->context_pin(engine, stream->ctx);
1090*3f2dd94aSFrançois Tigeot 		mutex_unlock(&dev_priv->drm.struct_mutex);
1091*3f2dd94aSFrançois Tigeot 		if (IS_ERR(ring))
1092*3f2dd94aSFrançois Tigeot 			return PTR_ERR(ring);
1093a85cb24fSFrançois Tigeot 
1094*3f2dd94aSFrançois Tigeot 
1095*3f2dd94aSFrançois Tigeot 		/*
1096*3f2dd94aSFrançois Tigeot 		 * Explicitly track the ID (instead of calling
1097*3f2dd94aSFrançois Tigeot 		 * i915_ggtt_offset() on the fly) considering the difference
1098*3f2dd94aSFrançois Tigeot 		 * with gen8+ and execlists
1099a85cb24fSFrançois Tigeot 		 */
1100a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.specific_ctx_id =
1101a85cb24fSFrançois Tigeot 			i915_ggtt_offset(stream->ctx->engine[engine->id].state);
1102*3f2dd94aSFrançois Tigeot 	}
1103a85cb24fSFrançois Tigeot 
1104*3f2dd94aSFrançois Tigeot 	return 0;
1105a85cb24fSFrançois Tigeot }
1106a85cb24fSFrançois Tigeot 
1107a85cb24fSFrançois Tigeot /**
1108a85cb24fSFrançois Tigeot  * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
1109a85cb24fSFrançois Tigeot  * @stream: An i915-perf stream opened for OA metrics
1110a85cb24fSFrançois Tigeot  *
1111a85cb24fSFrançois Tigeot  * In case anything needed doing to ensure the context HW ID would remain valid
1112a85cb24fSFrançois Tigeot  * for the lifetime of the stream, then that can be undone here.
1113a85cb24fSFrançois Tigeot  */
1114a85cb24fSFrançois Tigeot static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
1115a85cb24fSFrançois Tigeot {
1116a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1117*3f2dd94aSFrançois Tigeot 
1118*3f2dd94aSFrançois Tigeot 	if (i915_modparams.enable_execlists) {
1119*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1120*3f2dd94aSFrançois Tigeot 	} else {
1121a85cb24fSFrançois Tigeot 		struct intel_engine_cs *engine = dev_priv->engine[RCS];
1122a85cb24fSFrançois Tigeot 
1123a85cb24fSFrançois Tigeot 		mutex_lock(&dev_priv->drm.struct_mutex);
1124a85cb24fSFrançois Tigeot 
1125a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
1126a85cb24fSFrançois Tigeot 		engine->context_unpin(engine, stream->ctx);
1127a85cb24fSFrançois Tigeot 
1128a85cb24fSFrançois Tigeot 		mutex_unlock(&dev_priv->drm.struct_mutex);
1129a85cb24fSFrançois Tigeot 	}
1130*3f2dd94aSFrançois Tigeot }
1131a85cb24fSFrançois Tigeot 
1132a85cb24fSFrançois Tigeot static void
1133a85cb24fSFrançois Tigeot free_oa_buffer(struct drm_i915_private *i915)
1134a85cb24fSFrançois Tigeot {
1135a85cb24fSFrançois Tigeot 	mutex_lock(&i915->drm.struct_mutex);
1136a85cb24fSFrançois Tigeot 
1137a85cb24fSFrançois Tigeot 	i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
1138a85cb24fSFrançois Tigeot 	i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
1139a85cb24fSFrançois Tigeot 	i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
1140a85cb24fSFrançois Tigeot 
1141a85cb24fSFrançois Tigeot 	i915->perf.oa.oa_buffer.vma = NULL;
1142a85cb24fSFrançois Tigeot 	i915->perf.oa.oa_buffer.vaddr = NULL;
1143a85cb24fSFrançois Tigeot 
1144a85cb24fSFrançois Tigeot 	mutex_unlock(&i915->drm.struct_mutex);
1145a85cb24fSFrançois Tigeot }
1146a85cb24fSFrançois Tigeot 
1147a85cb24fSFrançois Tigeot static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
1148a85cb24fSFrançois Tigeot {
1149a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1150a85cb24fSFrançois Tigeot 
1151a85cb24fSFrançois Tigeot 	BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
1152a85cb24fSFrançois Tigeot 
1153*3f2dd94aSFrançois Tigeot 	/*
1154*3f2dd94aSFrançois Tigeot 	 * Unset exclusive_stream first, it will be checked while disabling
1155*3f2dd94aSFrançois Tigeot 	 * the metric set on gen8+.
1156*3f2dd94aSFrançois Tigeot 	 */
1157*3f2dd94aSFrançois Tigeot 	mutex_lock(&dev_priv->drm.struct_mutex);
1158*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.exclusive_stream = NULL;
1159a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1160*3f2dd94aSFrançois Tigeot 	mutex_unlock(&dev_priv->drm.struct_mutex);
1161a85cb24fSFrançois Tigeot 
1162a85cb24fSFrançois Tigeot 	free_oa_buffer(dev_priv);
1163a85cb24fSFrançois Tigeot 
1164a85cb24fSFrançois Tigeot 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1165a85cb24fSFrançois Tigeot 	intel_runtime_pm_put(dev_priv);
1166a85cb24fSFrançois Tigeot 
1167a85cb24fSFrançois Tigeot 	if (stream->ctx)
1168a85cb24fSFrançois Tigeot 		oa_put_render_ctx_id(stream);
1169a85cb24fSFrançois Tigeot 
1170a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.exclusive_stream = NULL;
1171a85cb24fSFrançois Tigeot }
1172a85cb24fSFrançois Tigeot 
1173a85cb24fSFrançois Tigeot static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
1174a85cb24fSFrançois Tigeot {
1175a85cb24fSFrançois Tigeot 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1176a85cb24fSFrançois Tigeot 
1177a85cb24fSFrançois Tigeot 	/* Pre-DevBDW: OABUFFER must be set with counters off,
1178a85cb24fSFrançois Tigeot 	 * before OASTATUS1, but after OASTATUS2
1179a85cb24fSFrançois Tigeot 	 */
1180a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
1181a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_OABUFFER, gtt_offset);
1182a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
1183a85cb24fSFrançois Tigeot 
1184a85cb24fSFrançois Tigeot 	/* On Haswell we have to track which OASTATUS1 flags we've
1185a85cb24fSFrançois Tigeot 	 * already seen since they can't be cleared while periodic
1186a85cb24fSFrançois Tigeot 	 * sampling is enabled.
1187a85cb24fSFrançois Tigeot 	 */
1188a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
1189a85cb24fSFrançois Tigeot 
1190a85cb24fSFrançois Tigeot 	/* NB: although the OA buffer will initially be allocated
1191a85cb24fSFrançois Tigeot 	 * zeroed via shmfs (and so this memset is redundant when
1192a85cb24fSFrançois Tigeot 	 * first allocating), we may re-init the OA buffer, either
1193a85cb24fSFrançois Tigeot 	 * when re-enabling a stream or in error/reset paths.
1194a85cb24fSFrançois Tigeot 	 *
1195a85cb24fSFrançois Tigeot 	 * The reason we clear the buffer for each re-init is for the
1196a85cb24fSFrançois Tigeot 	 * sanity check in gen7_append_oa_reports() that looks at the
1197a85cb24fSFrançois Tigeot 	 * report-id field to make sure it's non-zero which relies on
1198a85cb24fSFrançois Tigeot 	 * the assumption that new reports are being written to zeroed
1199a85cb24fSFrançois Tigeot 	 * memory...
1200a85cb24fSFrançois Tigeot 	 */
1201a85cb24fSFrançois Tigeot 	memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1202a85cb24fSFrançois Tigeot 
1203a85cb24fSFrançois Tigeot 	/* Maybe make ->pollin per-stream state if we support multiple
1204a85cb24fSFrançois Tigeot 	 * concurrent streams in the future.
1205a85cb24fSFrançois Tigeot 	 */
1206a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.pollin = false;
1207a85cb24fSFrançois Tigeot }
1208a85cb24fSFrançois Tigeot 
1209*3f2dd94aSFrançois Tigeot static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
1210*3f2dd94aSFrançois Tigeot {
1211*3f2dd94aSFrançois Tigeot 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
1212*3f2dd94aSFrançois Tigeot 	unsigned long flags;
1213*3f2dd94aSFrançois Tigeot 
1214*3f2dd94aSFrançois Tigeot 	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1215*3f2dd94aSFrançois Tigeot 
1216*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OASTATUS, 0);
1217*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
1218*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.head = gtt_offset;
1219*3f2dd94aSFrançois Tigeot 
1220*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OABUFFER_UDW, 0);
1221*3f2dd94aSFrançois Tigeot 
1222*3f2dd94aSFrançois Tigeot 	/*
1223*3f2dd94aSFrançois Tigeot 	 * PRM says:
1224*3f2dd94aSFrançois Tigeot 	 *
1225*3f2dd94aSFrançois Tigeot 	 *  "This MMIO must be set before the OATAILPTR
1226*3f2dd94aSFrançois Tigeot 	 *  register and after the OAHEADPTR register. This is
1227*3f2dd94aSFrançois Tigeot 	 *  to enable proper functionality of the overflow
1228*3f2dd94aSFrançois Tigeot 	 *  bit."
1229*3f2dd94aSFrançois Tigeot 	 */
1230*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OABUFFER, gtt_offset |
1231*3f2dd94aSFrançois Tigeot 		   OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT);
1232*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
1233*3f2dd94aSFrançois Tigeot 
1234*3f2dd94aSFrançois Tigeot 	/* Mark that we need updated tail pointers to read from... */
1235*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
1236*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
1237*3f2dd94aSFrançois Tigeot 
1238*3f2dd94aSFrançois Tigeot 	/*
1239*3f2dd94aSFrançois Tigeot 	 * Reset state used to recognise context switches, affecting which
1240*3f2dd94aSFrançois Tigeot 	 * reports we will forward to userspace while filtering for a single
1241*3f2dd94aSFrançois Tigeot 	 * context.
1242*3f2dd94aSFrançois Tigeot 	 */
1243*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
1244*3f2dd94aSFrançois Tigeot 
1245*3f2dd94aSFrançois Tigeot 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
1246*3f2dd94aSFrançois Tigeot 
1247*3f2dd94aSFrançois Tigeot 	/*
1248*3f2dd94aSFrançois Tigeot 	 * NB: although the OA buffer will initially be allocated
1249*3f2dd94aSFrançois Tigeot 	 * zeroed via shmfs (and so this memset is redundant when
1250*3f2dd94aSFrançois Tigeot 	 * first allocating), we may re-init the OA buffer, either
1251*3f2dd94aSFrançois Tigeot 	 * when re-enabling a stream or in error/reset paths.
1252*3f2dd94aSFrançois Tigeot 	 *
1253*3f2dd94aSFrançois Tigeot 	 * The reason we clear the buffer for each re-init is for the
1254*3f2dd94aSFrançois Tigeot 	 * sanity check in gen8_append_oa_reports() that looks at the
1255*3f2dd94aSFrançois Tigeot 	 * reason field to make sure it's non-zero which relies on
1256*3f2dd94aSFrançois Tigeot 	 * the assumption that new reports are being written to zeroed
1257*3f2dd94aSFrançois Tigeot 	 * memory...
1258*3f2dd94aSFrançois Tigeot 	 */
1259*3f2dd94aSFrançois Tigeot 	memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
1260*3f2dd94aSFrançois Tigeot 
1261*3f2dd94aSFrançois Tigeot 	/*
1262*3f2dd94aSFrançois Tigeot 	 * Maybe make ->pollin per-stream state if we support multiple
1263*3f2dd94aSFrançois Tigeot 	 * concurrent streams in the future.
1264*3f2dd94aSFrançois Tigeot 	 */
1265*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.pollin = false;
1266*3f2dd94aSFrançois Tigeot }
1267*3f2dd94aSFrançois Tigeot 
1268a85cb24fSFrançois Tigeot static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
1269a85cb24fSFrançois Tigeot {
1270a85cb24fSFrançois Tigeot 	struct drm_i915_gem_object *bo;
1271a85cb24fSFrançois Tigeot 	struct i915_vma *vma;
1272a85cb24fSFrançois Tigeot 	int ret;
1273a85cb24fSFrançois Tigeot 
1274a85cb24fSFrançois Tigeot 	if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
1275a85cb24fSFrançois Tigeot 		return -ENODEV;
1276a85cb24fSFrançois Tigeot 
1277a85cb24fSFrançois Tigeot 	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1278a85cb24fSFrançois Tigeot 	if (ret)
1279a85cb24fSFrançois Tigeot 		return ret;
1280a85cb24fSFrançois Tigeot 
1281a85cb24fSFrançois Tigeot 	BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
1282a85cb24fSFrançois Tigeot 	BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
1283a85cb24fSFrançois Tigeot 
1284a85cb24fSFrançois Tigeot 	bo = i915_gem_object_create(dev_priv, OA_BUFFER_SIZE);
1285a85cb24fSFrançois Tigeot 	if (IS_ERR(bo)) {
1286a85cb24fSFrançois Tigeot 		DRM_ERROR("Failed to allocate OA buffer\n");
1287a85cb24fSFrançois Tigeot 		ret = PTR_ERR(bo);
1288a85cb24fSFrançois Tigeot 		goto unlock;
1289a85cb24fSFrançois Tigeot 	}
1290a85cb24fSFrançois Tigeot 
1291a85cb24fSFrançois Tigeot 	ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
1292a85cb24fSFrançois Tigeot 	if (ret)
1293a85cb24fSFrançois Tigeot 		goto err_unref;
1294a85cb24fSFrançois Tigeot 
1295a85cb24fSFrançois Tigeot 	/* PreHSW required 512K alignment, HSW requires 16M */
1296a85cb24fSFrançois Tigeot 	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
1297a85cb24fSFrançois Tigeot 	if (IS_ERR(vma)) {
1298a85cb24fSFrançois Tigeot 		ret = PTR_ERR(vma);
1299a85cb24fSFrançois Tigeot 		goto err_unref;
1300a85cb24fSFrançois Tigeot 	}
1301a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.vma = vma;
1302a85cb24fSFrançois Tigeot 
1303a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.vaddr =
1304a85cb24fSFrançois Tigeot 		i915_gem_object_pin_map(bo, I915_MAP_WB);
1305a85cb24fSFrançois Tigeot 	if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
1306a85cb24fSFrançois Tigeot 		ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
1307a85cb24fSFrançois Tigeot 		goto err_unpin;
1308a85cb24fSFrançois Tigeot 	}
1309a85cb24fSFrançois Tigeot 
1310a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
1311a85cb24fSFrançois Tigeot 
1312a85cb24fSFrançois Tigeot 	DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
1313a85cb24fSFrançois Tigeot 			 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
1314a85cb24fSFrançois Tigeot 			 dev_priv->perf.oa.oa_buffer.vaddr);
1315a85cb24fSFrançois Tigeot 
1316a85cb24fSFrançois Tigeot 	goto unlock;
1317a85cb24fSFrançois Tigeot 
1318a85cb24fSFrançois Tigeot err_unpin:
1319a85cb24fSFrançois Tigeot 	__i915_vma_unpin(vma);
1320a85cb24fSFrançois Tigeot 
1321a85cb24fSFrançois Tigeot err_unref:
1322a85cb24fSFrançois Tigeot 	i915_gem_object_put(bo);
1323a85cb24fSFrançois Tigeot 
1324a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.vaddr = NULL;
1325a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.vma = NULL;
1326a85cb24fSFrançois Tigeot 
1327a85cb24fSFrançois Tigeot unlock:
1328a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->drm.struct_mutex);
1329a85cb24fSFrançois Tigeot 	return ret;
1330a85cb24fSFrançois Tigeot }
1331a85cb24fSFrançois Tigeot 
1332a85cb24fSFrançois Tigeot static void config_oa_regs(struct drm_i915_private *dev_priv,
1333a85cb24fSFrançois Tigeot 			   const struct i915_oa_reg *regs,
1334*3f2dd94aSFrançois Tigeot 			   u32 n_regs)
1335a85cb24fSFrançois Tigeot {
1336*3f2dd94aSFrançois Tigeot 	u32 i;
1337a85cb24fSFrançois Tigeot 
1338a85cb24fSFrançois Tigeot 	for (i = 0; i < n_regs; i++) {
1339a85cb24fSFrançois Tigeot 		const struct i915_oa_reg *reg = regs + i;
1340a85cb24fSFrançois Tigeot 
1341a85cb24fSFrançois Tigeot 		I915_WRITE(reg->addr, reg->value);
1342a85cb24fSFrançois Tigeot 	}
1343a85cb24fSFrançois Tigeot }
1344a85cb24fSFrançois Tigeot 
1345*3f2dd94aSFrançois Tigeot static int hsw_enable_metric_set(struct drm_i915_private *dev_priv,
1346*3f2dd94aSFrançois Tigeot 				 const struct i915_oa_config *oa_config)
1347a85cb24fSFrançois Tigeot {
1348a85cb24fSFrançois Tigeot 	/* PRM:
1349a85cb24fSFrançois Tigeot 	 *
1350a85cb24fSFrançois Tigeot 	 * OA unit is using “crclk” for its functionality. When trunk
1351a85cb24fSFrançois Tigeot 	 * level clock gating takes place, OA clock would be gated,
1352a85cb24fSFrançois Tigeot 	 * unable to count the events from non-render clock domain.
1353a85cb24fSFrançois Tigeot 	 * Render clock gating must be disabled when OA is enabled to
1354a85cb24fSFrançois Tigeot 	 * count the events from non-render domain. Unit level clock
1355a85cb24fSFrançois Tigeot 	 * gating for RCS should also be disabled.
1356a85cb24fSFrançois Tigeot 	 */
1357a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1358a85cb24fSFrançois Tigeot 				    ~GEN7_DOP_CLOCK_GATE_ENABLE));
1359a85cb24fSFrançois Tigeot 	I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
1360a85cb24fSFrançois Tigeot 				  GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1361a85cb24fSFrançois Tigeot 
1362*3f2dd94aSFrançois Tigeot 	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1363a85cb24fSFrançois Tigeot 
1364a85cb24fSFrançois Tigeot 	/* It apparently takes a fairly long time for a new MUX
1365a85cb24fSFrançois Tigeot 	 * configuration to be be applied after these register writes.
1366a85cb24fSFrançois Tigeot 	 * This delay duration was derived empirically based on the
1367a85cb24fSFrançois Tigeot 	 * render_basic config but hopefully it covers the maximum
1368a85cb24fSFrançois Tigeot 	 * configuration latency.
1369a85cb24fSFrançois Tigeot 	 *
1370a85cb24fSFrançois Tigeot 	 * As a fallback, the checks in _append_oa_reports() to skip
1371a85cb24fSFrançois Tigeot 	 * invalid OA reports do also seem to work to discard reports
1372a85cb24fSFrançois Tigeot 	 * generated before this config has completed - albeit not
1373a85cb24fSFrançois Tigeot 	 * silently.
1374a85cb24fSFrançois Tigeot 	 *
1375a85cb24fSFrançois Tigeot 	 * Unfortunately this is essentially a magic number, since we
1376a85cb24fSFrançois Tigeot 	 * don't currently know of a reliable mechanism for predicting
1377a85cb24fSFrançois Tigeot 	 * how long the MUX config will take to apply and besides
1378a85cb24fSFrançois Tigeot 	 * seeing invalid reports we don't know of a reliable way to
1379a85cb24fSFrançois Tigeot 	 * explicitly check that the MUX config has landed.
1380a85cb24fSFrançois Tigeot 	 *
1381a85cb24fSFrançois Tigeot 	 * It's even possible we've miss characterized the underlying
1382a85cb24fSFrançois Tigeot 	 * problem - it just seems like the simplest explanation why
1383a85cb24fSFrançois Tigeot 	 * a delay at this location would mitigate any invalid reports.
1384a85cb24fSFrançois Tigeot 	 */
1385a85cb24fSFrançois Tigeot 	usleep_range(15000, 20000);
1386a85cb24fSFrançois Tigeot 
1387*3f2dd94aSFrançois Tigeot 	config_oa_regs(dev_priv, oa_config->b_counter_regs,
1388*3f2dd94aSFrançois Tigeot 		       oa_config->b_counter_regs_len);
1389a85cb24fSFrançois Tigeot 
1390a85cb24fSFrançois Tigeot 	return 0;
1391a85cb24fSFrançois Tigeot }
1392a85cb24fSFrançois Tigeot 
1393a85cb24fSFrançois Tigeot static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
1394a85cb24fSFrançois Tigeot {
1395a85cb24fSFrançois Tigeot 	I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
1396a85cb24fSFrançois Tigeot 				  ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
1397a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
1398a85cb24fSFrançois Tigeot 				    GEN7_DOP_CLOCK_GATE_ENABLE));
1399a85cb24fSFrançois Tigeot 
1400a85cb24fSFrançois Tigeot 	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1401a85cb24fSFrançois Tigeot 				      ~GT_NOA_ENABLE));
1402a85cb24fSFrançois Tigeot }
1403a85cb24fSFrançois Tigeot 
1404*3f2dd94aSFrançois Tigeot /*
1405*3f2dd94aSFrançois Tigeot  * NB: It must always remain pointer safe to run this even if the OA unit
1406*3f2dd94aSFrançois Tigeot  * has been disabled.
1407*3f2dd94aSFrançois Tigeot  *
1408*3f2dd94aSFrançois Tigeot  * It's fine to put out-of-date values into these per-context registers
1409*3f2dd94aSFrançois Tigeot  * in the case that the OA unit has been disabled.
1410*3f2dd94aSFrançois Tigeot  */
1411*3f2dd94aSFrançois Tigeot static void gen8_update_reg_state_unlocked(struct i915_gem_context *ctx,
1412*3f2dd94aSFrançois Tigeot 					   u32 *reg_state,
1413*3f2dd94aSFrançois Tigeot 					   const struct i915_oa_config *oa_config)
1414a85cb24fSFrançois Tigeot {
1415*3f2dd94aSFrançois Tigeot 	struct drm_i915_private *dev_priv = ctx->i915;
1416*3f2dd94aSFrançois Tigeot 	u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
1417*3f2dd94aSFrançois Tigeot 	u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
1418*3f2dd94aSFrançois Tigeot 	/* The MMIO offsets for Flex EU registers aren't contiguous */
1419*3f2dd94aSFrançois Tigeot 	u32 flex_mmio[] = {
1420*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL0),
1421*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL1),
1422*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL2),
1423*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL3),
1424*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL4),
1425*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL5),
1426*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL6),
1427*3f2dd94aSFrançois Tigeot 	};
1428*3f2dd94aSFrançois Tigeot 	int i;
1429*3f2dd94aSFrançois Tigeot 
1430*3f2dd94aSFrançois Tigeot 	reg_state[ctx_oactxctrl] = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1431*3f2dd94aSFrançois Tigeot 	reg_state[ctx_oactxctrl+1] = (dev_priv->perf.oa.period_exponent <<
1432*3f2dd94aSFrançois Tigeot 				      GEN8_OA_TIMER_PERIOD_SHIFT) |
1433*3f2dd94aSFrançois Tigeot 				     (dev_priv->perf.oa.periodic ?
1434*3f2dd94aSFrançois Tigeot 				      GEN8_OA_TIMER_ENABLE : 0) |
1435*3f2dd94aSFrançois Tigeot 				     GEN8_OA_COUNTER_RESUME;
1436*3f2dd94aSFrançois Tigeot 
1437*3f2dd94aSFrançois Tigeot 	for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1438*3f2dd94aSFrançois Tigeot 		u32 state_offset = ctx_flexeu0 + i * 2;
1439*3f2dd94aSFrançois Tigeot 		u32 mmio = flex_mmio[i];
1440*3f2dd94aSFrançois Tigeot 
1441*3f2dd94aSFrançois Tigeot 		/*
1442*3f2dd94aSFrançois Tigeot 		 * This arbitrary default will select the 'EU FPU0 Pipeline
1443*3f2dd94aSFrançois Tigeot 		 * Active' event. In the future it's anticipated that there
1444*3f2dd94aSFrançois Tigeot 		 * will be an explicit 'No Event' we can select, but not yet...
1445*3f2dd94aSFrançois Tigeot 		 */
1446*3f2dd94aSFrançois Tigeot 		u32 value = 0;
1447*3f2dd94aSFrançois Tigeot 
1448*3f2dd94aSFrançois Tigeot 		if (oa_config) {
1449*3f2dd94aSFrançois Tigeot 			u32 j;
1450*3f2dd94aSFrançois Tigeot 
1451*3f2dd94aSFrançois Tigeot 			for (j = 0; j < oa_config->flex_regs_len; j++) {
1452*3f2dd94aSFrançois Tigeot 				if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1453*3f2dd94aSFrançois Tigeot 					value = oa_config->flex_regs[j].value;
1454*3f2dd94aSFrançois Tigeot 					break;
1455*3f2dd94aSFrançois Tigeot 				}
1456*3f2dd94aSFrançois Tigeot 			}
1457*3f2dd94aSFrançois Tigeot 		}
1458*3f2dd94aSFrançois Tigeot 
1459*3f2dd94aSFrançois Tigeot 		reg_state[state_offset] = mmio;
1460*3f2dd94aSFrançois Tigeot 		reg_state[state_offset+1] = value;
1461*3f2dd94aSFrançois Tigeot 	}
1462*3f2dd94aSFrançois Tigeot }
1463*3f2dd94aSFrançois Tigeot 
1464*3f2dd94aSFrançois Tigeot /*
1465*3f2dd94aSFrançois Tigeot  * Same as gen8_update_reg_state_unlocked only through the batchbuffer. This
1466*3f2dd94aSFrançois Tigeot  * is only used by the kernel context.
1467*3f2dd94aSFrançois Tigeot  */
1468*3f2dd94aSFrançois Tigeot static int gen8_emit_oa_config(struct drm_i915_gem_request *req,
1469*3f2dd94aSFrançois Tigeot 			       const struct i915_oa_config *oa_config)
1470*3f2dd94aSFrançois Tigeot {
1471*3f2dd94aSFrançois Tigeot 	struct drm_i915_private *dev_priv = req->i915;
1472*3f2dd94aSFrançois Tigeot 	/* The MMIO offsets for Flex EU registers aren't contiguous */
1473*3f2dd94aSFrançois Tigeot 	u32 flex_mmio[] = {
1474*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL0),
1475*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL1),
1476*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL2),
1477*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL3),
1478*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL4),
1479*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL5),
1480*3f2dd94aSFrançois Tigeot 		i915_mmio_reg_offset(EU_PERF_CNTL6),
1481*3f2dd94aSFrançois Tigeot 	};
1482*3f2dd94aSFrançois Tigeot 	u32 *cs;
1483*3f2dd94aSFrançois Tigeot 	int i;
1484*3f2dd94aSFrançois Tigeot 
1485*3f2dd94aSFrançois Tigeot 	cs = intel_ring_begin(req, ARRAY_SIZE(flex_mmio) * 2 + 4);
1486*3f2dd94aSFrançois Tigeot 	if (IS_ERR(cs))
1487*3f2dd94aSFrançois Tigeot 		return PTR_ERR(cs);
1488*3f2dd94aSFrançois Tigeot 
1489*3f2dd94aSFrançois Tigeot 	*cs++ = MI_LOAD_REGISTER_IMM(ARRAY_SIZE(flex_mmio) + 1);
1490*3f2dd94aSFrançois Tigeot 
1491*3f2dd94aSFrançois Tigeot 	*cs++ = i915_mmio_reg_offset(GEN8_OACTXCONTROL);
1492*3f2dd94aSFrançois Tigeot 	*cs++ = (dev_priv->perf.oa.period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
1493*3f2dd94aSFrançois Tigeot 		(dev_priv->perf.oa.periodic ? GEN8_OA_TIMER_ENABLE : 0) |
1494*3f2dd94aSFrançois Tigeot 		GEN8_OA_COUNTER_RESUME;
1495*3f2dd94aSFrançois Tigeot 
1496*3f2dd94aSFrançois Tigeot 	for (i = 0; i < ARRAY_SIZE(flex_mmio); i++) {
1497*3f2dd94aSFrançois Tigeot 		u32 mmio = flex_mmio[i];
1498*3f2dd94aSFrançois Tigeot 
1499*3f2dd94aSFrançois Tigeot 		/*
1500*3f2dd94aSFrançois Tigeot 		 * This arbitrary default will select the 'EU FPU0 Pipeline
1501*3f2dd94aSFrançois Tigeot 		 * Active' event. In the future it's anticipated that there
1502*3f2dd94aSFrançois Tigeot 		 * will be an explicit 'No Event' we can select, but not
1503*3f2dd94aSFrançois Tigeot 		 * yet...
1504*3f2dd94aSFrançois Tigeot 		 */
1505*3f2dd94aSFrançois Tigeot 		u32 value = 0;
1506*3f2dd94aSFrançois Tigeot 
1507*3f2dd94aSFrançois Tigeot 		if (oa_config) {
1508*3f2dd94aSFrançois Tigeot 			u32 j;
1509*3f2dd94aSFrançois Tigeot 
1510*3f2dd94aSFrançois Tigeot 			for (j = 0; j < oa_config->flex_regs_len; j++) {
1511*3f2dd94aSFrançois Tigeot 				if (i915_mmio_reg_offset(oa_config->flex_regs[j].addr) == mmio) {
1512*3f2dd94aSFrançois Tigeot 					value = oa_config->flex_regs[j].value;
1513*3f2dd94aSFrançois Tigeot 					break;
1514*3f2dd94aSFrançois Tigeot 				}
1515*3f2dd94aSFrançois Tigeot 			}
1516*3f2dd94aSFrançois Tigeot 		}
1517*3f2dd94aSFrançois Tigeot 
1518*3f2dd94aSFrançois Tigeot 		*cs++ = mmio;
1519*3f2dd94aSFrançois Tigeot 		*cs++ = value;
1520*3f2dd94aSFrançois Tigeot 	}
1521*3f2dd94aSFrançois Tigeot 
1522*3f2dd94aSFrançois Tigeot 	*cs++ = MI_NOOP;
1523*3f2dd94aSFrançois Tigeot 	intel_ring_advance(req, cs);
1524*3f2dd94aSFrançois Tigeot 
1525*3f2dd94aSFrançois Tigeot 	return 0;
1526*3f2dd94aSFrançois Tigeot }
1527*3f2dd94aSFrançois Tigeot 
1528*3f2dd94aSFrançois Tigeot static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_priv,
1529*3f2dd94aSFrançois Tigeot 						 const struct i915_oa_config *oa_config)
1530*3f2dd94aSFrançois Tigeot {
1531*3f2dd94aSFrançois Tigeot 	struct intel_engine_cs *engine = dev_priv->engine[RCS];
1532*3f2dd94aSFrançois Tigeot 	struct i915_gem_timeline *timeline;
1533*3f2dd94aSFrançois Tigeot 	struct drm_i915_gem_request *req;
1534*3f2dd94aSFrançois Tigeot 	int ret;
1535*3f2dd94aSFrançois Tigeot 
1536*3f2dd94aSFrançois Tigeot 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
1537*3f2dd94aSFrançois Tigeot 
1538*3f2dd94aSFrançois Tigeot 	i915_gem_retire_requests(dev_priv);
1539*3f2dd94aSFrançois Tigeot 
1540*3f2dd94aSFrançois Tigeot 	req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
1541*3f2dd94aSFrançois Tigeot 	if (IS_ERR(req))
1542*3f2dd94aSFrançois Tigeot 		return PTR_ERR(req);
1543*3f2dd94aSFrançois Tigeot 
1544*3f2dd94aSFrançois Tigeot 	ret = gen8_emit_oa_config(req, oa_config);
1545*3f2dd94aSFrançois Tigeot 	if (ret) {
1546*3f2dd94aSFrançois Tigeot 		i915_add_request(req);
1547*3f2dd94aSFrançois Tigeot 		return ret;
1548*3f2dd94aSFrançois Tigeot 	}
1549*3f2dd94aSFrançois Tigeot 
1550*3f2dd94aSFrançois Tigeot 	/* Queue this switch after all other activity */
1551*3f2dd94aSFrançois Tigeot 	list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
1552*3f2dd94aSFrançois Tigeot 		struct drm_i915_gem_request *prev;
1553*3f2dd94aSFrançois Tigeot 		struct intel_timeline *tl;
1554*3f2dd94aSFrançois Tigeot 
1555*3f2dd94aSFrançois Tigeot 		tl = &timeline->engine[engine->id];
1556*3f2dd94aSFrançois Tigeot 		prev = i915_gem_active_raw(&tl->last_request,
1557*3f2dd94aSFrançois Tigeot 					   &dev_priv->drm.struct_mutex);
1558*3f2dd94aSFrançois Tigeot 		if (prev)
1559*3f2dd94aSFrançois Tigeot 			i915_sw_fence_await_sw_fence_gfp(&req->submit,
1560*3f2dd94aSFrançois Tigeot 							 &prev->submit,
1561*3f2dd94aSFrançois Tigeot 							 GFP_KERNEL);
1562*3f2dd94aSFrançois Tigeot 	}
1563*3f2dd94aSFrançois Tigeot 
1564*3f2dd94aSFrançois Tigeot 	ret = i915_switch_context(req);
1565*3f2dd94aSFrançois Tigeot 	i915_add_request(req);
1566*3f2dd94aSFrançois Tigeot 
1567*3f2dd94aSFrançois Tigeot 	return ret;
1568*3f2dd94aSFrançois Tigeot }
1569*3f2dd94aSFrançois Tigeot 
1570*3f2dd94aSFrançois Tigeot /*
1571*3f2dd94aSFrançois Tigeot  * Manages updating the per-context aspects of the OA stream
1572*3f2dd94aSFrançois Tigeot  * configuration across all contexts.
1573*3f2dd94aSFrançois Tigeot  *
1574*3f2dd94aSFrançois Tigeot  * The awkward consideration here is that OACTXCONTROL controls the
1575*3f2dd94aSFrançois Tigeot  * exponent for periodic sampling which is primarily used for system
1576*3f2dd94aSFrançois Tigeot  * wide profiling where we'd like a consistent sampling period even in
1577*3f2dd94aSFrançois Tigeot  * the face of context switches.
1578*3f2dd94aSFrançois Tigeot  *
1579*3f2dd94aSFrançois Tigeot  * Our approach of updating the register state context (as opposed to
1580*3f2dd94aSFrançois Tigeot  * say using a workaround batch buffer) ensures that the hardware
1581*3f2dd94aSFrançois Tigeot  * won't automatically reload an out-of-date timer exponent even
1582*3f2dd94aSFrançois Tigeot  * transiently before a WA BB could be parsed.
1583*3f2dd94aSFrançois Tigeot  *
1584*3f2dd94aSFrançois Tigeot  * This function needs to:
1585*3f2dd94aSFrançois Tigeot  * - Ensure the currently running context's per-context OA state is
1586*3f2dd94aSFrançois Tigeot  *   updated
1587*3f2dd94aSFrançois Tigeot  * - Ensure that all existing contexts will have the correct per-context
1588*3f2dd94aSFrançois Tigeot  *   OA state if they are scheduled for use.
1589*3f2dd94aSFrançois Tigeot  * - Ensure any new contexts will be initialized with the correct
1590*3f2dd94aSFrançois Tigeot  *   per-context OA state.
1591*3f2dd94aSFrançois Tigeot  *
1592*3f2dd94aSFrançois Tigeot  * Note: it's only the RCS/Render context that has any OA state.
1593*3f2dd94aSFrançois Tigeot  */
1594*3f2dd94aSFrançois Tigeot static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv,
1595*3f2dd94aSFrançois Tigeot 				       const struct i915_oa_config *oa_config)
1596*3f2dd94aSFrançois Tigeot {
1597*3f2dd94aSFrançois Tigeot 	struct i915_gem_context *ctx;
1598*3f2dd94aSFrançois Tigeot 	int ret;
1599*3f2dd94aSFrançois Tigeot 	unsigned int wait_flags = I915_WAIT_LOCKED;
1600*3f2dd94aSFrançois Tigeot 
1601*3f2dd94aSFrançois Tigeot 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
1602*3f2dd94aSFrançois Tigeot 
1603*3f2dd94aSFrançois Tigeot 	/* Switch away from any user context. */
1604*3f2dd94aSFrançois Tigeot 	ret = gen8_switch_to_updated_kernel_context(dev_priv, oa_config);
1605*3f2dd94aSFrançois Tigeot 	if (ret)
1606*3f2dd94aSFrançois Tigeot 		goto out;
1607*3f2dd94aSFrançois Tigeot 
1608*3f2dd94aSFrançois Tigeot 	/*
1609*3f2dd94aSFrançois Tigeot 	 * The OA register config is setup through the context image. This image
1610*3f2dd94aSFrançois Tigeot 	 * might be written to by the GPU on context switch (in particular on
1611*3f2dd94aSFrançois Tigeot 	 * lite-restore). This means we can't safely update a context's image,
1612*3f2dd94aSFrançois Tigeot 	 * if this context is scheduled/submitted to run on the GPU.
1613*3f2dd94aSFrançois Tigeot 	 *
1614*3f2dd94aSFrançois Tigeot 	 * We could emit the OA register config through the batch buffer but
1615*3f2dd94aSFrançois Tigeot 	 * this might leave small interval of time where the OA unit is
1616*3f2dd94aSFrançois Tigeot 	 * configured at an invalid sampling period.
1617*3f2dd94aSFrançois Tigeot 	 *
1618*3f2dd94aSFrançois Tigeot 	 * So far the best way to work around this issue seems to be draining
1619*3f2dd94aSFrançois Tigeot 	 * the GPU from any submitted work.
1620*3f2dd94aSFrançois Tigeot 	 */
1621*3f2dd94aSFrançois Tigeot 	ret = i915_gem_wait_for_idle(dev_priv, wait_flags);
1622*3f2dd94aSFrançois Tigeot 	if (ret)
1623*3f2dd94aSFrançois Tigeot 		goto out;
1624*3f2dd94aSFrançois Tigeot 
1625*3f2dd94aSFrançois Tigeot 	/* Update all contexts now that we've stalled the submission. */
1626*3f2dd94aSFrançois Tigeot 	list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
1627*3f2dd94aSFrançois Tigeot 		struct intel_context *ce = &ctx->engine[RCS];
1628*3f2dd94aSFrançois Tigeot 		u32 *regs;
1629*3f2dd94aSFrançois Tigeot 
1630*3f2dd94aSFrançois Tigeot 		/* OA settings will be set upon first use */
1631*3f2dd94aSFrançois Tigeot 		if (!ce->state)
1632*3f2dd94aSFrançois Tigeot 			continue;
1633*3f2dd94aSFrançois Tigeot 
1634*3f2dd94aSFrançois Tigeot 		regs = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB);
1635*3f2dd94aSFrançois Tigeot 		if (IS_ERR(regs)) {
1636*3f2dd94aSFrançois Tigeot 			ret = PTR_ERR(regs);
1637*3f2dd94aSFrançois Tigeot 			goto out;
1638*3f2dd94aSFrançois Tigeot 		}
1639*3f2dd94aSFrançois Tigeot 
1640*3f2dd94aSFrançois Tigeot 		ce->state->obj->mm.dirty = true;
1641*3f2dd94aSFrançois Tigeot 		regs += LRC_STATE_PN * PAGE_SIZE / sizeof(*regs);
1642*3f2dd94aSFrançois Tigeot 
1643*3f2dd94aSFrançois Tigeot 		gen8_update_reg_state_unlocked(ctx, regs, oa_config);
1644*3f2dd94aSFrançois Tigeot 
1645*3f2dd94aSFrançois Tigeot 		i915_gem_object_unpin_map(ce->state->obj);
1646*3f2dd94aSFrançois Tigeot 	}
1647*3f2dd94aSFrançois Tigeot 
1648*3f2dd94aSFrançois Tigeot  out:
1649*3f2dd94aSFrançois Tigeot 	return ret;
1650*3f2dd94aSFrançois Tigeot }
1651*3f2dd94aSFrançois Tigeot 
1652*3f2dd94aSFrançois Tigeot static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
1653*3f2dd94aSFrançois Tigeot 				  const struct i915_oa_config *oa_config)
1654*3f2dd94aSFrançois Tigeot {
1655*3f2dd94aSFrançois Tigeot 	int ret;
1656*3f2dd94aSFrançois Tigeot 
1657*3f2dd94aSFrançois Tigeot 	/*
1658*3f2dd94aSFrançois Tigeot 	 * We disable slice/unslice clock ratio change reports on SKL since
1659*3f2dd94aSFrançois Tigeot 	 * they are too noisy. The HW generates a lot of redundant reports
1660*3f2dd94aSFrançois Tigeot 	 * where the ratio hasn't really changed causing a lot of redundant
1661*3f2dd94aSFrançois Tigeot 	 * work to processes and increasing the chances we'll hit buffer
1662*3f2dd94aSFrançois Tigeot 	 * overruns.
1663*3f2dd94aSFrançois Tigeot 	 *
1664*3f2dd94aSFrançois Tigeot 	 * Although we don't currently use the 'disable overrun' OABUFFER
1665*3f2dd94aSFrançois Tigeot 	 * feature it's worth noting that clock ratio reports have to be
1666*3f2dd94aSFrançois Tigeot 	 * disabled before considering to use that feature since the HW doesn't
1667*3f2dd94aSFrançois Tigeot 	 * correctly block these reports.
1668*3f2dd94aSFrançois Tigeot 	 *
1669*3f2dd94aSFrançois Tigeot 	 * Currently none of the high-level metrics we have depend on knowing
1670*3f2dd94aSFrançois Tigeot 	 * this ratio to normalize.
1671*3f2dd94aSFrançois Tigeot 	 *
1672*3f2dd94aSFrançois Tigeot 	 * Note: This register is not power context saved and restored, but
1673*3f2dd94aSFrançois Tigeot 	 * that's OK considering that we disable RC6 while the OA unit is
1674*3f2dd94aSFrançois Tigeot 	 * enabled.
1675*3f2dd94aSFrançois Tigeot 	 *
1676*3f2dd94aSFrançois Tigeot 	 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to
1677*3f2dd94aSFrançois Tigeot 	 * be read back from automatically triggered reports, as part of the
1678*3f2dd94aSFrançois Tigeot 	 * RPT_ID field.
1679*3f2dd94aSFrançois Tigeot 	 */
1680*3f2dd94aSFrançois Tigeot 	if (IS_GEN9(dev_priv)) {
1681*3f2dd94aSFrançois Tigeot 		I915_WRITE(GEN8_OA_DEBUG,
1682*3f2dd94aSFrançois Tigeot 			   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
1683*3f2dd94aSFrançois Tigeot 					      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
1684*3f2dd94aSFrançois Tigeot 	}
1685*3f2dd94aSFrançois Tigeot 
1686*3f2dd94aSFrançois Tigeot 	/*
1687*3f2dd94aSFrançois Tigeot 	 * Update all contexts prior writing the mux configurations as we need
1688*3f2dd94aSFrançois Tigeot 	 * to make sure all slices/subslices are ON before writing to NOA
1689*3f2dd94aSFrançois Tigeot 	 * registers.
1690*3f2dd94aSFrançois Tigeot 	 */
1691*3f2dd94aSFrançois Tigeot 	ret = gen8_configure_all_contexts(dev_priv, oa_config);
1692*3f2dd94aSFrançois Tigeot 	if (ret)
1693*3f2dd94aSFrançois Tigeot 		return ret;
1694*3f2dd94aSFrançois Tigeot 
1695*3f2dd94aSFrançois Tigeot 	config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len);
1696*3f2dd94aSFrançois Tigeot 
1697*3f2dd94aSFrançois Tigeot 	config_oa_regs(dev_priv, oa_config->b_counter_regs,
1698*3f2dd94aSFrançois Tigeot 		       oa_config->b_counter_regs_len);
1699*3f2dd94aSFrançois Tigeot 
1700*3f2dd94aSFrançois Tigeot 	return 0;
1701*3f2dd94aSFrançois Tigeot }
1702*3f2dd94aSFrançois Tigeot 
1703*3f2dd94aSFrançois Tigeot static void gen8_disable_metric_set(struct drm_i915_private *dev_priv)
1704*3f2dd94aSFrançois Tigeot {
1705*3f2dd94aSFrançois Tigeot 	/* Reset all contexts' slices/subslices configurations. */
1706*3f2dd94aSFrançois Tigeot 	gen8_configure_all_contexts(dev_priv, NULL);
1707*3f2dd94aSFrançois Tigeot 
1708*3f2dd94aSFrançois Tigeot 	I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
1709*3f2dd94aSFrançois Tigeot 				      ~GT_NOA_ENABLE));
1710*3f2dd94aSFrançois Tigeot 
1711*3f2dd94aSFrançois Tigeot }
1712*3f2dd94aSFrançois Tigeot 
1713*3f2dd94aSFrançois Tigeot static void gen7_oa_enable(struct drm_i915_private *dev_priv)
1714*3f2dd94aSFrançois Tigeot {
1715*3f2dd94aSFrançois Tigeot 	/*
1716*3f2dd94aSFrançois Tigeot 	 * Reset buf pointers so we don't forward reports from before now.
1717*3f2dd94aSFrançois Tigeot 	 *
1718*3f2dd94aSFrançois Tigeot 	 * Think carefully if considering trying to avoid this, since it
1719*3f2dd94aSFrançois Tigeot 	 * also ensures status flags and the buffer itself are cleared
1720*3f2dd94aSFrançois Tigeot 	 * in error paths, and we have checks for invalid reports based
1721*3f2dd94aSFrançois Tigeot 	 * on the assumption that certain fields are written to zeroed
1722*3f2dd94aSFrançois Tigeot 	 * memory which this helps maintains.
1723*3f2dd94aSFrançois Tigeot 	 */
1724*3f2dd94aSFrançois Tigeot 	gen7_init_oa_buffer(dev_priv);
1725a85cb24fSFrançois Tigeot 
1726a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.exclusive_stream->enabled) {
1727a85cb24fSFrançois Tigeot 		struct i915_gem_context *ctx =
1728a85cb24fSFrançois Tigeot 			dev_priv->perf.oa.exclusive_stream->ctx;
1729a85cb24fSFrançois Tigeot 		u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
1730a85cb24fSFrançois Tigeot 
1731a85cb24fSFrançois Tigeot 		bool periodic = dev_priv->perf.oa.periodic;
1732a85cb24fSFrançois Tigeot 		u32 period_exponent = dev_priv->perf.oa.period_exponent;
1733a85cb24fSFrançois Tigeot 		u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1734a85cb24fSFrançois Tigeot 
1735a85cb24fSFrançois Tigeot 		I915_WRITE(GEN7_OACONTROL,
1736a85cb24fSFrançois Tigeot 			   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
1737a85cb24fSFrançois Tigeot 			   (period_exponent <<
1738a85cb24fSFrançois Tigeot 			    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
1739a85cb24fSFrançois Tigeot 			   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
1740a85cb24fSFrançois Tigeot 			   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
1741a85cb24fSFrançois Tigeot 			   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
1742a85cb24fSFrançois Tigeot 			   GEN7_OACONTROL_ENABLE);
1743a85cb24fSFrançois Tigeot 	} else
1744a85cb24fSFrançois Tigeot 		I915_WRITE(GEN7_OACONTROL, 0);
1745a85cb24fSFrançois Tigeot }
1746a85cb24fSFrançois Tigeot 
1747*3f2dd94aSFrançois Tigeot static void gen8_oa_enable(struct drm_i915_private *dev_priv)
1748a85cb24fSFrançois Tigeot {
1749*3f2dd94aSFrançois Tigeot 	u32 report_format = dev_priv->perf.oa.oa_buffer.format;
1750a85cb24fSFrançois Tigeot 
1751*3f2dd94aSFrançois Tigeot 	/*
1752*3f2dd94aSFrançois Tigeot 	 * Reset buf pointers so we don't forward reports from before now.
1753a85cb24fSFrançois Tigeot 	 *
1754a85cb24fSFrançois Tigeot 	 * Think carefully if considering trying to avoid this, since it
1755a85cb24fSFrançois Tigeot 	 * also ensures status flags and the buffer itself are cleared
1756a85cb24fSFrançois Tigeot 	 * in error paths, and we have checks for invalid reports based
1757a85cb24fSFrançois Tigeot 	 * on the assumption that certain fields are written to zeroed
1758a85cb24fSFrançois Tigeot 	 * memory which this helps maintains.
1759a85cb24fSFrançois Tigeot 	 */
1760*3f2dd94aSFrançois Tigeot 	gen8_init_oa_buffer(dev_priv);
1761a85cb24fSFrançois Tigeot 
1762*3f2dd94aSFrançois Tigeot 	/*
1763*3f2dd94aSFrançois Tigeot 	 * Note: we don't rely on the hardware to perform single context
1764*3f2dd94aSFrançois Tigeot 	 * filtering and instead filter on the cpu based on the context-id
1765*3f2dd94aSFrançois Tigeot 	 * field of reports
1766*3f2dd94aSFrançois Tigeot 	 */
1767*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OACONTROL, (report_format <<
1768*3f2dd94aSFrançois Tigeot 				    GEN8_OA_REPORT_FORMAT_SHIFT) |
1769*3f2dd94aSFrançois Tigeot 				   GEN8_OA_COUNTER_ENABLE);
1770a85cb24fSFrançois Tigeot }
1771a85cb24fSFrançois Tigeot 
1772a85cb24fSFrançois Tigeot /**
1773a85cb24fSFrançois Tigeot  * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream
1774a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream opened for OA metrics
1775a85cb24fSFrançois Tigeot  *
1776a85cb24fSFrançois Tigeot  * [Re]enables hardware periodic sampling according to the period configured
1777a85cb24fSFrançois Tigeot  * when opening the stream. This also starts a hrtimer that will periodically
1778a85cb24fSFrançois Tigeot  * check for data in the circular OA buffer for notifying userspace (e.g.
1779a85cb24fSFrançois Tigeot  * during a read() or poll()).
1780a85cb24fSFrançois Tigeot  */
1781a85cb24fSFrançois Tigeot static void i915_oa_stream_enable(struct i915_perf_stream *stream)
1782a85cb24fSFrançois Tigeot {
1783a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1784a85cb24fSFrançois Tigeot 
1785a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.ops.oa_enable(dev_priv);
1786a85cb24fSFrançois Tigeot 
1787a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.periodic)
1788a85cb24fSFrançois Tigeot 		hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
1789a85cb24fSFrançois Tigeot 			      ns_to_ktime(POLL_PERIOD),
1790a85cb24fSFrançois Tigeot 			      HRTIMER_MODE_REL_PINNED);
1791a85cb24fSFrançois Tigeot }
1792a85cb24fSFrançois Tigeot 
1793a85cb24fSFrançois Tigeot static void gen7_oa_disable(struct drm_i915_private *dev_priv)
1794a85cb24fSFrançois Tigeot {
1795a85cb24fSFrançois Tigeot 	I915_WRITE(GEN7_OACONTROL, 0);
1796a85cb24fSFrançois Tigeot }
1797a85cb24fSFrançois Tigeot 
1798*3f2dd94aSFrançois Tigeot static void gen8_oa_disable(struct drm_i915_private *dev_priv)
1799*3f2dd94aSFrançois Tigeot {
1800*3f2dd94aSFrançois Tigeot 	I915_WRITE(GEN8_OACONTROL, 0);
1801*3f2dd94aSFrançois Tigeot }
1802*3f2dd94aSFrançois Tigeot 
1803a85cb24fSFrançois Tigeot /**
1804a85cb24fSFrançois Tigeot  * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream
1805a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream opened for OA metrics
1806a85cb24fSFrançois Tigeot  *
1807a85cb24fSFrançois Tigeot  * Stops the OA unit from periodically writing counter reports into the
1808a85cb24fSFrançois Tigeot  * circular OA buffer. This also stops the hrtimer that periodically checks for
1809a85cb24fSFrançois Tigeot  * data in the circular OA buffer, for notifying userspace.
1810a85cb24fSFrançois Tigeot  */
1811a85cb24fSFrançois Tigeot static void i915_oa_stream_disable(struct i915_perf_stream *stream)
1812a85cb24fSFrançois Tigeot {
1813a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1814a85cb24fSFrançois Tigeot 
1815a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.ops.oa_disable(dev_priv);
1816a85cb24fSFrançois Tigeot 
1817a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.periodic)
1818a85cb24fSFrançois Tigeot 		hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
1819a85cb24fSFrançois Tigeot }
1820a85cb24fSFrançois Tigeot 
1821a85cb24fSFrançois Tigeot static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
1822a85cb24fSFrançois Tigeot {
1823a85cb24fSFrançois Tigeot 	return div_u64(1000000000ULL * (2ULL << exponent),
1824a85cb24fSFrançois Tigeot 		       dev_priv->perf.oa.timestamp_frequency);
1825a85cb24fSFrançois Tigeot }
1826a85cb24fSFrançois Tigeot 
1827a85cb24fSFrançois Tigeot static const struct i915_perf_stream_ops i915_oa_stream_ops = {
1828a85cb24fSFrançois Tigeot 	.destroy = i915_oa_stream_destroy,
1829a85cb24fSFrançois Tigeot 	.enable = i915_oa_stream_enable,
1830a85cb24fSFrançois Tigeot 	.disable = i915_oa_stream_disable,
1831a85cb24fSFrançois Tigeot 	.wait_unlocked = i915_oa_wait_unlocked,
1832a85cb24fSFrançois Tigeot 	.poll_wait = i915_oa_poll_wait,
1833a85cb24fSFrançois Tigeot 	.read = i915_oa_read,
1834a85cb24fSFrançois Tigeot };
1835a85cb24fSFrançois Tigeot 
1836a85cb24fSFrançois Tigeot /**
1837a85cb24fSFrançois Tigeot  * i915_oa_stream_init - validate combined props for OA stream and init
1838a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream
1839a85cb24fSFrançois Tigeot  * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
1840a85cb24fSFrançois Tigeot  * @props: The property state that configures stream (individually validated)
1841a85cb24fSFrançois Tigeot  *
1842a85cb24fSFrançois Tigeot  * While read_properties_unlocked() validates properties in isolation it
1843a85cb24fSFrançois Tigeot  * doesn't ensure that the combination necessarily makes sense.
1844a85cb24fSFrançois Tigeot  *
1845a85cb24fSFrançois Tigeot  * At this point it has been determined that userspace wants a stream of
1846a85cb24fSFrançois Tigeot  * OA metrics, but still we need to further validate the combined
1847a85cb24fSFrançois Tigeot  * properties are OK.
1848a85cb24fSFrançois Tigeot  *
1849a85cb24fSFrançois Tigeot  * If the configuration makes sense then we can allocate memory for
1850a85cb24fSFrançois Tigeot  * a circular OA buffer and apply the requested metric set configuration.
1851a85cb24fSFrançois Tigeot  *
1852a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code.
1853a85cb24fSFrançois Tigeot  */
1854a85cb24fSFrançois Tigeot static int i915_oa_stream_init(struct i915_perf_stream *stream,
1855a85cb24fSFrançois Tigeot 			       struct drm_i915_perf_open_param *param,
1856a85cb24fSFrançois Tigeot 			       struct perf_open_properties *props)
1857a85cb24fSFrançois Tigeot {
1858a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
1859a85cb24fSFrançois Tigeot 	int format_size;
1860a85cb24fSFrançois Tigeot 	int ret;
1861a85cb24fSFrançois Tigeot 
1862a85cb24fSFrançois Tigeot 	/* If the sysfs metrics/ directory wasn't registered for some
1863a85cb24fSFrançois Tigeot 	 * reason then don't let userspace try their luck with config
1864a85cb24fSFrançois Tigeot 	 * IDs
1865a85cb24fSFrançois Tigeot 	 */
1866a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.metrics_kobj) {
1867a85cb24fSFrançois Tigeot 		DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
1868a85cb24fSFrançois Tigeot 		return -EINVAL;
1869a85cb24fSFrançois Tigeot 	}
1870a85cb24fSFrançois Tigeot 
1871a85cb24fSFrançois Tigeot 	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
1872a85cb24fSFrançois Tigeot 		DRM_DEBUG("Only OA report sampling supported\n");
1873a85cb24fSFrançois Tigeot 		return -EINVAL;
1874a85cb24fSFrançois Tigeot 	}
1875a85cb24fSFrançois Tigeot 
1876a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.oa.ops.init_oa_buffer) {
1877a85cb24fSFrançois Tigeot 		DRM_DEBUG("OA unit not supported\n");
1878a85cb24fSFrançois Tigeot 		return -ENODEV;
1879a85cb24fSFrançois Tigeot 	}
1880a85cb24fSFrançois Tigeot 
1881a85cb24fSFrançois Tigeot 	/* To avoid the complexity of having to accurately filter
1882a85cb24fSFrançois Tigeot 	 * counter reports and marshal to the appropriate client
1883a85cb24fSFrançois Tigeot 	 * we currently only allow exclusive access
1884a85cb24fSFrançois Tigeot 	 */
1885a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.exclusive_stream) {
1886a85cb24fSFrançois Tigeot 		DRM_DEBUG("OA unit already in use\n");
1887a85cb24fSFrançois Tigeot 		return -EBUSY;
1888a85cb24fSFrançois Tigeot 	}
1889a85cb24fSFrançois Tigeot 
1890a85cb24fSFrançois Tigeot 	if (!props->oa_format) {
1891a85cb24fSFrançois Tigeot 		DRM_DEBUG("OA report format not specified\n");
1892a85cb24fSFrançois Tigeot 		return -EINVAL;
1893a85cb24fSFrançois Tigeot 	}
1894a85cb24fSFrançois Tigeot 
1895a85cb24fSFrançois Tigeot 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
1896a85cb24fSFrançois Tigeot 
1897a85cb24fSFrançois Tigeot 	format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
1898a85cb24fSFrançois Tigeot 
1899a85cb24fSFrançois Tigeot 	stream->sample_flags |= SAMPLE_OA_REPORT;
1900a85cb24fSFrançois Tigeot 	stream->sample_size += format_size;
1901a85cb24fSFrançois Tigeot 
1902a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.format_size = format_size;
1903a85cb24fSFrançois Tigeot 	if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
1904a85cb24fSFrançois Tigeot 		return -EINVAL;
1905a85cb24fSFrançois Tigeot 
1906a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.oa_buffer.format =
1907a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.oa_formats[props->oa_format].format;
1908a85cb24fSFrançois Tigeot 
1909a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.periodic = props->oa_periodic;
1910a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.periodic) {
1911a85cb24fSFrançois Tigeot 		u32 tail;
1912a85cb24fSFrançois Tigeot 
1913a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
1914a85cb24fSFrançois Tigeot 
1915a85cb24fSFrançois Tigeot 		/* See comment for OA_TAIL_MARGIN_NSEC for details
1916a85cb24fSFrançois Tigeot 		 * about this tail_margin...
1917a85cb24fSFrançois Tigeot 		 */
1918a85cb24fSFrançois Tigeot 		tail = div64_u64(OA_TAIL_MARGIN_NSEC,
1919a85cb24fSFrançois Tigeot 				 oa_exponent_to_ns(dev_priv,
1920a85cb24fSFrançois Tigeot 						   props->oa_period_exponent));
1921a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.tail_margin = (tail + 1) * format_size;
1922a85cb24fSFrançois Tigeot 	}
1923a85cb24fSFrançois Tigeot 
1924a85cb24fSFrançois Tigeot 	if (stream->ctx) {
1925a85cb24fSFrançois Tigeot 		ret = oa_get_render_ctx_id(stream);
1926a85cb24fSFrançois Tigeot 		if (ret)
1927a85cb24fSFrançois Tigeot 			return ret;
1928a85cb24fSFrançois Tigeot 	}
1929a85cb24fSFrançois Tigeot 
1930*3f2dd94aSFrançois Tigeot 	ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config);
1931*3f2dd94aSFrançois Tigeot 	if (ret)
1932*3f2dd94aSFrançois Tigeot 		goto err_config;
1933*3f2dd94aSFrançois Tigeot 
1934a85cb24fSFrançois Tigeot 	/* PRM - observability performance counters:
1935a85cb24fSFrançois Tigeot 	 *
1936a85cb24fSFrançois Tigeot 	 *   OACONTROL, performance counter enable, note:
1937a85cb24fSFrançois Tigeot 	 *
1938a85cb24fSFrançois Tigeot 	 *   "When this bit is set, in order to have coherent counts,
1939a85cb24fSFrançois Tigeot 	 *   RC6 power state and trunk clock gating must be disabled.
1940a85cb24fSFrançois Tigeot 	 *   This can be achieved by programming MMIO registers as
1941a85cb24fSFrançois Tigeot 	 *   0xA094=0 and 0xA090[31]=1"
1942a85cb24fSFrançois Tigeot 	 *
1943a85cb24fSFrançois Tigeot 	 *   In our case we are expecting that taking pm + FORCEWAKE
1944a85cb24fSFrançois Tigeot 	 *   references will effectively disable RC6.
1945a85cb24fSFrançois Tigeot 	 */
1946a85cb24fSFrançois Tigeot 	intel_runtime_pm_get(dev_priv);
1947a85cb24fSFrançois Tigeot 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
1948a85cb24fSFrançois Tigeot 
1949a85cb24fSFrançois Tigeot 	ret = alloc_oa_buffer(dev_priv);
1950a85cb24fSFrançois Tigeot 	if (ret)
1951a85cb24fSFrançois Tigeot 		goto err_oa_buf_alloc;
1952a85cb24fSFrançois Tigeot 
1953*3f2dd94aSFrançois Tigeot 	ret = alloc_oa_buffer(dev_priv);
1954*3f2dd94aSFrançois Tigeot 	if (ret)
1955*3f2dd94aSFrançois Tigeot 		goto err_oa_buf_alloc;
1956*3f2dd94aSFrançois Tigeot 
1957*3f2dd94aSFrançois Tigeot 	ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1958*3f2dd94aSFrançois Tigeot 	if (ret)
1959*3f2dd94aSFrançois Tigeot 		goto err_lock;
1960*3f2dd94aSFrançois Tigeot 
1961*3f2dd94aSFrançois Tigeot 	ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv,
1962*3f2dd94aSFrançois Tigeot 						      stream->oa_config);
1963a85cb24fSFrançois Tigeot 	if (ret)
1964a85cb24fSFrançois Tigeot 		goto err_enable;
1965a85cb24fSFrançois Tigeot 
1966a85cb24fSFrançois Tigeot 	stream->ops = &i915_oa_stream_ops;
1967a85cb24fSFrançois Tigeot 
1968a85cb24fSFrançois Tigeot 	dev_priv->perf.oa.exclusive_stream = stream;
1969a85cb24fSFrançois Tigeot 
1970*3f2dd94aSFrançois Tigeot 	mutex_unlock(&dev_priv->drm.struct_mutex);
1971*3f2dd94aSFrançois Tigeot 
1972a85cb24fSFrançois Tigeot 	return 0;
1973a85cb24fSFrançois Tigeot 
1974a85cb24fSFrançois Tigeot err_enable:
1975*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
1976*3f2dd94aSFrançois Tigeot 	mutex_unlock(&dev_priv->drm.struct_mutex);
1977*3f2dd94aSFrançois Tigeot 
1978*3f2dd94aSFrançois Tigeot err_lock:
1979a85cb24fSFrançois Tigeot 	free_oa_buffer(dev_priv);
1980a85cb24fSFrançois Tigeot 
1981a85cb24fSFrançois Tigeot err_oa_buf_alloc:
1982*3f2dd94aSFrançois Tigeot 	put_oa_config(dev_priv, stream->oa_config);
1983*3f2dd94aSFrançois Tigeot 
1984a85cb24fSFrançois Tigeot 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
1985a85cb24fSFrançois Tigeot 	intel_runtime_pm_put(dev_priv);
1986*3f2dd94aSFrançois Tigeot 
1987*3f2dd94aSFrançois Tigeot err_config:
1988a85cb24fSFrançois Tigeot 	if (stream->ctx)
1989a85cb24fSFrançois Tigeot 		oa_put_render_ctx_id(stream);
1990a85cb24fSFrançois Tigeot 
1991a85cb24fSFrançois Tigeot 	return ret;
1992a85cb24fSFrançois Tigeot }
1993*3f2dd94aSFrançois Tigeot #endif
1994a85cb24fSFrançois Tigeot 
i915_oa_init_reg_state(struct intel_engine_cs * engine,struct i915_gem_context * ctx,u32 * reg_state)1995*3f2dd94aSFrançois Tigeot void i915_oa_init_reg_state(struct intel_engine_cs *engine,
1996*3f2dd94aSFrançois Tigeot 			    struct i915_gem_context *ctx,
1997*3f2dd94aSFrançois Tigeot 			    u32 *reg_state)
1998*3f2dd94aSFrançois Tigeot {
1999*3f2dd94aSFrançois Tigeot #if 0
2000*3f2dd94aSFrançois Tigeot 	struct drm_i915_private *dev_priv = engine->i915;
2001*3f2dd94aSFrançois Tigeot 	struct i915_perf_stream *stream = dev_priv->perf.oa.exclusive_stream;
2002*3f2dd94aSFrançois Tigeot 
2003*3f2dd94aSFrançois Tigeot 	if (engine->id != RCS)
2004*3f2dd94aSFrançois Tigeot 		return;
2005*3f2dd94aSFrançois Tigeot 
2006*3f2dd94aSFrançois Tigeot 	if (!dev_priv->perf.initialized)
2007*3f2dd94aSFrançois Tigeot 		return;
2008*3f2dd94aSFrançois Tigeot 
2009*3f2dd94aSFrançois Tigeot 	gen8_update_reg_state_unlocked(ctx, reg_state);
2010*3f2dd94aSFrançois Tigeot #endif
2011*3f2dd94aSFrançois Tigeot }
2012*3f2dd94aSFrançois Tigeot 
2013*3f2dd94aSFrançois Tigeot #if 0
2014a85cb24fSFrançois Tigeot /**
2015a85cb24fSFrançois Tigeot  * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation
2016a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream
2017a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2018a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
2019a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
2020a85cb24fSFrançois Tigeot  * @ppos: (inout) file seek position (unused)
2021a85cb24fSFrançois Tigeot  *
2022a85cb24fSFrançois Tigeot  * Besides wrapping &i915_perf_stream_ops->read this provides a common place to
2023a85cb24fSFrançois Tigeot  * ensure that if we've successfully copied any data then reporting that takes
2024a85cb24fSFrançois Tigeot  * precedence over any internal error status, so the data isn't lost.
2025a85cb24fSFrançois Tigeot  *
2026a85cb24fSFrançois Tigeot  * For example ret will be -ENOSPC whenever there is more buffered data than
2027a85cb24fSFrançois Tigeot  * can be copied to userspace, but that's only interesting if we weren't able
2028a85cb24fSFrançois Tigeot  * to copy some data because it implies the userspace buffer is too small to
2029a85cb24fSFrançois Tigeot  * receive a single record (and we never split records).
2030a85cb24fSFrançois Tigeot  *
2031a85cb24fSFrançois Tigeot  * Another case with ret == -EFAULT is more of a grey area since it would seem
2032a85cb24fSFrançois Tigeot  * like bad form for userspace to ask us to overrun its buffer, but the user
2033a85cb24fSFrançois Tigeot  * knows best:
2034a85cb24fSFrançois Tigeot  *
2035a85cb24fSFrançois Tigeot  *   http://yarchive.net/comp/linux/partial_reads_writes.html
2036a85cb24fSFrançois Tigeot  *
2037a85cb24fSFrançois Tigeot  * Returns: The number of bytes copied or a negative error code on failure.
2038a85cb24fSFrançois Tigeot  */
2039a85cb24fSFrançois Tigeot static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
2040a85cb24fSFrançois Tigeot 				     struct file *file,
2041a85cb24fSFrançois Tigeot 				     char __user *buf,
2042a85cb24fSFrançois Tigeot 				     size_t count,
2043a85cb24fSFrançois Tigeot 				     loff_t *ppos)
2044a85cb24fSFrançois Tigeot {
2045a85cb24fSFrançois Tigeot 	/* Note we keep the offset (aka bytes read) separate from any
2046a85cb24fSFrançois Tigeot 	 * error status so that the final check for whether we return
2047a85cb24fSFrançois Tigeot 	 * the bytes read with a higher precedence than any error (see
2048a85cb24fSFrançois Tigeot 	 * comment below) doesn't need to be handled/duplicated in
2049a85cb24fSFrançois Tigeot 	 * stream->ops->read() implementations.
2050a85cb24fSFrançois Tigeot 	 */
2051a85cb24fSFrançois Tigeot 	size_t offset = 0;
2052a85cb24fSFrançois Tigeot 	int ret = stream->ops->read(stream, buf, count, &offset);
2053a85cb24fSFrançois Tigeot 
2054a85cb24fSFrançois Tigeot 	return offset ?: (ret ?: -EAGAIN);
2055a85cb24fSFrançois Tigeot }
2056a85cb24fSFrançois Tigeot 
2057a85cb24fSFrançois Tigeot /**
2058a85cb24fSFrançois Tigeot  * i915_perf_read - handles read() FOP for i915 perf stream FDs
2059a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2060a85cb24fSFrançois Tigeot  * @buf: destination buffer given by userspace
2061a85cb24fSFrançois Tigeot  * @count: the number of bytes userspace wants to read
2062a85cb24fSFrançois Tigeot  * @ppos: (inout) file seek position (unused)
2063a85cb24fSFrançois Tigeot  *
2064a85cb24fSFrançois Tigeot  * The entry point for handling a read() on a stream file descriptor from
2065a85cb24fSFrançois Tigeot  * userspace. Most of the work is left to the i915_perf_read_locked() and
2066a85cb24fSFrançois Tigeot  * &i915_perf_stream_ops->read but to save having stream implementations (of
2067a85cb24fSFrançois Tigeot  * which we might have multiple later) we handle blocking read here.
2068a85cb24fSFrançois Tigeot  *
2069a85cb24fSFrançois Tigeot  * We can also consistently treat trying to read from a disabled stream
2070a85cb24fSFrançois Tigeot  * as an IO error so implementations can assume the stream is enabled
2071a85cb24fSFrançois Tigeot  * while reading.
2072a85cb24fSFrançois Tigeot  *
2073a85cb24fSFrançois Tigeot  * Returns: The number of bytes copied or a negative error code on failure.
2074a85cb24fSFrançois Tigeot  */
2075a85cb24fSFrançois Tigeot static ssize_t i915_perf_read(struct file *file,
2076a85cb24fSFrançois Tigeot 			      char __user *buf,
2077a85cb24fSFrançois Tigeot 			      size_t count,
2078a85cb24fSFrançois Tigeot 			      loff_t *ppos)
2079a85cb24fSFrançois Tigeot {
2080a85cb24fSFrançois Tigeot 	struct i915_perf_stream *stream = file->private_data;
2081a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
2082a85cb24fSFrançois Tigeot 	ssize_t ret;
2083a85cb24fSFrançois Tigeot 
2084a85cb24fSFrançois Tigeot 	/* To ensure it's handled consistently we simply treat all reads of a
2085a85cb24fSFrançois Tigeot 	 * disabled stream as an error. In particular it might otherwise lead
2086a85cb24fSFrançois Tigeot 	 * to a deadlock for blocking file descriptors...
2087a85cb24fSFrançois Tigeot 	 */
2088a85cb24fSFrançois Tigeot 	if (!stream->enabled)
2089a85cb24fSFrançois Tigeot 		return -EIO;
2090a85cb24fSFrançois Tigeot 
2091a85cb24fSFrançois Tigeot 	if (!(file->f_flags & O_NONBLOCK)) {
2092a85cb24fSFrançois Tigeot 		/* There's the small chance of false positives from
2093a85cb24fSFrançois Tigeot 		 * stream->ops->wait_unlocked.
2094a85cb24fSFrançois Tigeot 		 *
2095a85cb24fSFrançois Tigeot 		 * E.g. with single context filtering since we only wait until
2096a85cb24fSFrançois Tigeot 		 * oabuffer has >= 1 report we don't immediately know whether
2097a85cb24fSFrançois Tigeot 		 * any reports really belong to the current context
2098a85cb24fSFrançois Tigeot 		 */
2099a85cb24fSFrançois Tigeot 		do {
2100a85cb24fSFrançois Tigeot 			ret = stream->ops->wait_unlocked(stream);
2101a85cb24fSFrançois Tigeot 			if (ret)
2102a85cb24fSFrançois Tigeot 				return ret;
2103a85cb24fSFrançois Tigeot 
2104a85cb24fSFrançois Tigeot 			mutex_lock(&dev_priv->perf.lock);
2105a85cb24fSFrançois Tigeot 			ret = i915_perf_read_locked(stream, file,
2106a85cb24fSFrançois Tigeot 						    buf, count, ppos);
2107a85cb24fSFrançois Tigeot 			mutex_unlock(&dev_priv->perf.lock);
2108a85cb24fSFrançois Tigeot 		} while (ret == -EAGAIN);
2109a85cb24fSFrançois Tigeot 	} else {
2110a85cb24fSFrançois Tigeot 		mutex_lock(&dev_priv->perf.lock);
2111a85cb24fSFrançois Tigeot 		ret = i915_perf_read_locked(stream, file, buf, count, ppos);
2112a85cb24fSFrançois Tigeot 		mutex_unlock(&dev_priv->perf.lock);
2113a85cb24fSFrançois Tigeot 	}
2114a85cb24fSFrançois Tigeot 
2115a85cb24fSFrançois Tigeot 	if (ret >= 0) {
2116a85cb24fSFrançois Tigeot 		/* Maybe make ->pollin per-stream state if we support multiple
2117a85cb24fSFrançois Tigeot 		 * concurrent streams in the future.
2118a85cb24fSFrançois Tigeot 		 */
2119a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.pollin = false;
2120a85cb24fSFrançois Tigeot 	}
2121a85cb24fSFrançois Tigeot 
2122a85cb24fSFrançois Tigeot 	return ret;
2123a85cb24fSFrançois Tigeot }
2124a85cb24fSFrançois Tigeot 
2125a85cb24fSFrançois Tigeot static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
2126a85cb24fSFrançois Tigeot {
2127a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv =
2128a85cb24fSFrançois Tigeot 		container_of(hrtimer, typeof(*dev_priv),
2129a85cb24fSFrançois Tigeot 			     perf.oa.poll_check_timer);
2130a85cb24fSFrançois Tigeot 
2131a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
2132a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.pollin = true;
2133a85cb24fSFrançois Tigeot 		wake_up(&dev_priv->perf.oa.poll_wq);
2134a85cb24fSFrançois Tigeot 	}
2135a85cb24fSFrançois Tigeot 
2136a85cb24fSFrançois Tigeot 	hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
2137a85cb24fSFrançois Tigeot 
2138a85cb24fSFrançois Tigeot 	return HRTIMER_RESTART;
2139a85cb24fSFrançois Tigeot }
2140a85cb24fSFrançois Tigeot 
2141a85cb24fSFrançois Tigeot /**
2142a85cb24fSFrançois Tigeot  * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream
2143a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2144a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream
2145a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2146a85cb24fSFrançois Tigeot  * @wait: poll() state table
2147a85cb24fSFrançois Tigeot  *
2148a85cb24fSFrançois Tigeot  * For handling userspace polling on an i915 perf stream, this calls through to
2149a85cb24fSFrançois Tigeot  * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
2150a85cb24fSFrançois Tigeot  * will be woken for new stream data.
2151a85cb24fSFrançois Tigeot  *
2152a85cb24fSFrançois Tigeot  * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2153a85cb24fSFrançois Tigeot  * with any non-file-operation driver hooks.
2154a85cb24fSFrançois Tigeot  *
2155a85cb24fSFrançois Tigeot  * Returns: any poll events that are ready without sleeping
2156a85cb24fSFrançois Tigeot  */
2157a85cb24fSFrançois Tigeot static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
2158a85cb24fSFrançois Tigeot 					  struct i915_perf_stream *stream,
2159a85cb24fSFrançois Tigeot 					  struct file *file,
2160a85cb24fSFrançois Tigeot 					  poll_table *wait)
2161a85cb24fSFrançois Tigeot {
2162a85cb24fSFrançois Tigeot 	unsigned int events = 0;
2163a85cb24fSFrançois Tigeot 
2164a85cb24fSFrançois Tigeot 	stream->ops->poll_wait(stream, file, wait);
2165a85cb24fSFrançois Tigeot 
2166a85cb24fSFrançois Tigeot 	/* Note: we don't explicitly check whether there's something to read
2167a85cb24fSFrançois Tigeot 	 * here since this path may be very hot depending on what else
2168a85cb24fSFrançois Tigeot 	 * userspace is polling, or on the timeout in use. We rely solely on
2169a85cb24fSFrançois Tigeot 	 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
2170a85cb24fSFrançois Tigeot 	 * samples to read.
2171a85cb24fSFrançois Tigeot 	 */
2172a85cb24fSFrançois Tigeot 	if (dev_priv->perf.oa.pollin)
2173a85cb24fSFrançois Tigeot 		events |= POLLIN;
2174a85cb24fSFrançois Tigeot 
2175a85cb24fSFrançois Tigeot 	return events;
2176a85cb24fSFrançois Tigeot }
2177a85cb24fSFrançois Tigeot 
2178a85cb24fSFrançois Tigeot /**
2179a85cb24fSFrançois Tigeot  * i915_perf_poll - call poll_wait() with a suitable wait queue for stream
2180a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2181a85cb24fSFrançois Tigeot  * @wait: poll() state table
2182a85cb24fSFrançois Tigeot  *
2183a85cb24fSFrançois Tigeot  * For handling userspace polling on an i915 perf stream, this ensures
2184a85cb24fSFrançois Tigeot  * poll_wait() gets called with a wait queue that will be woken for new stream
2185a85cb24fSFrançois Tigeot  * data.
2186a85cb24fSFrançois Tigeot  *
2187a85cb24fSFrançois Tigeot  * Note: Implementation deferred to i915_perf_poll_locked()
2188a85cb24fSFrançois Tigeot  *
2189a85cb24fSFrançois Tigeot  * Returns: any poll events that are ready without sleeping
2190a85cb24fSFrançois Tigeot  */
2191a85cb24fSFrançois Tigeot static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
2192a85cb24fSFrançois Tigeot {
2193a85cb24fSFrançois Tigeot 	struct i915_perf_stream *stream = file->private_data;
2194a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
2195a85cb24fSFrançois Tigeot 	int ret;
2196a85cb24fSFrançois Tigeot 
2197a85cb24fSFrançois Tigeot 	mutex_lock(&dev_priv->perf.lock);
2198a85cb24fSFrançois Tigeot 	ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
2199a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->perf.lock);
2200a85cb24fSFrançois Tigeot 
2201a85cb24fSFrançois Tigeot 	return ret;
2202a85cb24fSFrançois Tigeot }
2203a85cb24fSFrançois Tigeot 
2204a85cb24fSFrançois Tigeot /**
2205a85cb24fSFrançois Tigeot  * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl
2206a85cb24fSFrançois Tigeot  * @stream: A disabled i915 perf stream
2207a85cb24fSFrançois Tigeot  *
2208a85cb24fSFrançois Tigeot  * [Re]enables the associated capture of data for this stream.
2209a85cb24fSFrançois Tigeot  *
2210a85cb24fSFrançois Tigeot  * If a stream was previously enabled then there's currently no intention
2211a85cb24fSFrançois Tigeot  * to provide userspace any guarantee about the preservation of previously
2212a85cb24fSFrançois Tigeot  * buffered data.
2213a85cb24fSFrançois Tigeot  */
2214a85cb24fSFrançois Tigeot static void i915_perf_enable_locked(struct i915_perf_stream *stream)
2215a85cb24fSFrançois Tigeot {
2216a85cb24fSFrançois Tigeot 	if (stream->enabled)
2217a85cb24fSFrançois Tigeot 		return;
2218a85cb24fSFrançois Tigeot 
2219a85cb24fSFrançois Tigeot 	/* Allow stream->ops->enable() to refer to this */
2220a85cb24fSFrançois Tigeot 	stream->enabled = true;
2221a85cb24fSFrançois Tigeot 
2222a85cb24fSFrançois Tigeot 	if (stream->ops->enable)
2223a85cb24fSFrançois Tigeot 		stream->ops->enable(stream);
2224a85cb24fSFrançois Tigeot }
2225a85cb24fSFrançois Tigeot 
2226a85cb24fSFrançois Tigeot /**
2227a85cb24fSFrançois Tigeot  * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl
2228a85cb24fSFrançois Tigeot  * @stream: An enabled i915 perf stream
2229a85cb24fSFrançois Tigeot  *
2230a85cb24fSFrançois Tigeot  * Disables the associated capture of data for this stream.
2231a85cb24fSFrançois Tigeot  *
2232a85cb24fSFrançois Tigeot  * The intention is that disabling an re-enabling a stream will ideally be
2233a85cb24fSFrançois Tigeot  * cheaper than destroying and re-opening a stream with the same configuration,
2234a85cb24fSFrançois Tigeot  * though there are no formal guarantees about what state or buffered data
2235a85cb24fSFrançois Tigeot  * must be retained between disabling and re-enabling a stream.
2236a85cb24fSFrançois Tigeot  *
2237a85cb24fSFrançois Tigeot  * Note: while a stream is disabled it's considered an error for userspace
2238a85cb24fSFrançois Tigeot  * to attempt to read from the stream (-EIO).
2239a85cb24fSFrançois Tigeot  */
2240a85cb24fSFrançois Tigeot static void i915_perf_disable_locked(struct i915_perf_stream *stream)
2241a85cb24fSFrançois Tigeot {
2242a85cb24fSFrançois Tigeot 	if (!stream->enabled)
2243a85cb24fSFrançois Tigeot 		return;
2244a85cb24fSFrançois Tigeot 
2245a85cb24fSFrançois Tigeot 	/* Allow stream->ops->disable() to refer to this */
2246a85cb24fSFrançois Tigeot 	stream->enabled = false;
2247a85cb24fSFrançois Tigeot 
2248a85cb24fSFrançois Tigeot 	if (stream->ops->disable)
2249a85cb24fSFrançois Tigeot 		stream->ops->disable(stream);
2250a85cb24fSFrançois Tigeot }
2251a85cb24fSFrançois Tigeot 
2252a85cb24fSFrançois Tigeot /**
2253a85cb24fSFrançois Tigeot  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
2254a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream
2255a85cb24fSFrançois Tigeot  * @cmd: the ioctl request
2256a85cb24fSFrançois Tigeot  * @arg: the ioctl data
2257a85cb24fSFrançois Tigeot  *
2258a85cb24fSFrançois Tigeot  * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2259a85cb24fSFrançois Tigeot  * with any non-file-operation driver hooks.
2260a85cb24fSFrançois Tigeot  *
2261a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code. Returns -EINVAL for
2262a85cb24fSFrançois Tigeot  * an unknown ioctl request.
2263a85cb24fSFrançois Tigeot  */
2264a85cb24fSFrançois Tigeot static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
2265a85cb24fSFrançois Tigeot 				   unsigned int cmd,
2266a85cb24fSFrançois Tigeot 				   unsigned long arg)
2267a85cb24fSFrançois Tigeot {
2268a85cb24fSFrançois Tigeot 	switch (cmd) {
2269a85cb24fSFrançois Tigeot 	case I915_PERF_IOCTL_ENABLE:
2270a85cb24fSFrançois Tigeot 		i915_perf_enable_locked(stream);
2271a85cb24fSFrançois Tigeot 		return 0;
2272a85cb24fSFrançois Tigeot 	case I915_PERF_IOCTL_DISABLE:
2273a85cb24fSFrançois Tigeot 		i915_perf_disable_locked(stream);
2274a85cb24fSFrançois Tigeot 		return 0;
2275a85cb24fSFrançois Tigeot 	}
2276a85cb24fSFrançois Tigeot 
2277a85cb24fSFrançois Tigeot 	return -EINVAL;
2278a85cb24fSFrançois Tigeot }
2279a85cb24fSFrançois Tigeot 
2280a85cb24fSFrançois Tigeot /**
2281a85cb24fSFrançois Tigeot  * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
2282a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2283a85cb24fSFrançois Tigeot  * @cmd: the ioctl request
2284a85cb24fSFrançois Tigeot  * @arg: the ioctl data
2285a85cb24fSFrançois Tigeot  *
2286a85cb24fSFrançois Tigeot  * Implementation deferred to i915_perf_ioctl_locked().
2287a85cb24fSFrançois Tigeot  *
2288a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code. Returns -EINVAL for
2289a85cb24fSFrançois Tigeot  * an unknown ioctl request.
2290a85cb24fSFrançois Tigeot  */
2291a85cb24fSFrançois Tigeot static long i915_perf_ioctl(struct file *file,
2292a85cb24fSFrançois Tigeot 			    unsigned int cmd,
2293a85cb24fSFrançois Tigeot 			    unsigned long arg)
2294a85cb24fSFrançois Tigeot {
2295a85cb24fSFrançois Tigeot 	struct i915_perf_stream *stream = file->private_data;
2296a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
2297a85cb24fSFrançois Tigeot 	long ret;
2298a85cb24fSFrançois Tigeot 
2299a85cb24fSFrançois Tigeot 	mutex_lock(&dev_priv->perf.lock);
2300a85cb24fSFrançois Tigeot 	ret = i915_perf_ioctl_locked(stream, cmd, arg);
2301a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->perf.lock);
2302a85cb24fSFrançois Tigeot 
2303a85cb24fSFrançois Tigeot 	return ret;
2304a85cb24fSFrançois Tigeot }
2305a85cb24fSFrançois Tigeot 
2306a85cb24fSFrançois Tigeot /**
2307a85cb24fSFrançois Tigeot  * i915_perf_destroy_locked - destroy an i915 perf stream
2308a85cb24fSFrançois Tigeot  * @stream: An i915 perf stream
2309a85cb24fSFrançois Tigeot  *
2310a85cb24fSFrançois Tigeot  * Frees all resources associated with the given i915 perf @stream, disabling
2311a85cb24fSFrançois Tigeot  * any associated data capture in the process.
2312a85cb24fSFrançois Tigeot  *
2313a85cb24fSFrançois Tigeot  * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize
2314a85cb24fSFrançois Tigeot  * with any non-file-operation driver hooks.
2315a85cb24fSFrançois Tigeot  */
2316a85cb24fSFrançois Tigeot static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
2317a85cb24fSFrançois Tigeot {
2318a85cb24fSFrançois Tigeot 	if (stream->enabled)
2319a85cb24fSFrançois Tigeot 		i915_perf_disable_locked(stream);
2320a85cb24fSFrançois Tigeot 
2321a85cb24fSFrançois Tigeot 	if (stream->ops->destroy)
2322a85cb24fSFrançois Tigeot 		stream->ops->destroy(stream);
2323a85cb24fSFrançois Tigeot 
2324a85cb24fSFrançois Tigeot 	list_del(&stream->link);
2325a85cb24fSFrançois Tigeot 
2326a85cb24fSFrançois Tigeot 	if (stream->ctx)
2327*3f2dd94aSFrançois Tigeot 		i915_gem_context_put(stream->ctx);
2328a85cb24fSFrançois Tigeot 
2329a85cb24fSFrançois Tigeot 	kfree(stream);
2330a85cb24fSFrançois Tigeot }
2331a85cb24fSFrançois Tigeot 
2332a85cb24fSFrançois Tigeot /**
2333a85cb24fSFrançois Tigeot  * i915_perf_release - handles userspace close() of a stream file
2334a85cb24fSFrançois Tigeot  * @inode: anonymous inode associated with file
2335a85cb24fSFrançois Tigeot  * @file: An i915 perf stream file
2336a85cb24fSFrançois Tigeot  *
2337a85cb24fSFrançois Tigeot  * Cleans up any resources associated with an open i915 perf stream file.
2338a85cb24fSFrançois Tigeot  *
2339a85cb24fSFrançois Tigeot  * NB: close() can't really fail from the userspace point of view.
2340a85cb24fSFrançois Tigeot  *
2341a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code.
2342a85cb24fSFrançois Tigeot  */
2343a85cb24fSFrançois Tigeot static int i915_perf_release(struct inode *inode, struct file *file)
2344a85cb24fSFrançois Tigeot {
2345a85cb24fSFrançois Tigeot 	struct i915_perf_stream *stream = file->private_data;
2346a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = stream->dev_priv;
2347a85cb24fSFrançois Tigeot 
2348a85cb24fSFrançois Tigeot 	mutex_lock(&dev_priv->perf.lock);
2349a85cb24fSFrançois Tigeot 	i915_perf_destroy_locked(stream);
2350a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->perf.lock);
2351a85cb24fSFrançois Tigeot 
2352a85cb24fSFrançois Tigeot 	return 0;
2353a85cb24fSFrançois Tigeot }
2354a85cb24fSFrançois Tigeot 
2355a85cb24fSFrançois Tigeot 
2356a85cb24fSFrançois Tigeot static const struct file_operations fops = {
2357a85cb24fSFrançois Tigeot 	.owner		= THIS_MODULE,
2358a85cb24fSFrançois Tigeot 	.llseek		= no_llseek,
2359a85cb24fSFrançois Tigeot 	.release	= i915_perf_release,
2360a85cb24fSFrançois Tigeot 	.poll		= i915_perf_poll,
2361a85cb24fSFrançois Tigeot 	.read		= i915_perf_read,
2362a85cb24fSFrançois Tigeot 	.unlocked_ioctl	= i915_perf_ioctl,
2363*3f2dd94aSFrançois Tigeot 	/* Our ioctl have no arguments, so it's safe to use the same function
2364*3f2dd94aSFrançois Tigeot 	 * to handle 32bits compatibility.
2365*3f2dd94aSFrançois Tigeot 	 */
2366*3f2dd94aSFrançois Tigeot 	.compat_ioctl   = i915_perf_ioctl,
2367a85cb24fSFrançois Tigeot };
2368a85cb24fSFrançois Tigeot 
2369a85cb24fSFrançois Tigeot 
2370a85cb24fSFrançois Tigeot /**
2371a85cb24fSFrançois Tigeot  * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD
2372a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2373a85cb24fSFrançois Tigeot  * @param: The open parameters passed to 'DRM_I915_PERF_OPEN`
2374a85cb24fSFrançois Tigeot  * @props: individually validated u64 property value pairs
2375a85cb24fSFrançois Tigeot  * @file: drm file
2376a85cb24fSFrançois Tigeot  *
2377a85cb24fSFrançois Tigeot  * See i915_perf_ioctl_open() for interface details.
2378a85cb24fSFrançois Tigeot  *
2379a85cb24fSFrançois Tigeot  * Implements further stream config validation and stream initialization on
2380a85cb24fSFrançois Tigeot  * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex
2381a85cb24fSFrançois Tigeot  * taken to serialize with any non-file-operation driver hooks.
2382a85cb24fSFrançois Tigeot  *
2383a85cb24fSFrançois Tigeot  * Note: at this point the @props have only been validated in isolation and
2384a85cb24fSFrançois Tigeot  * it's still necessary to validate that the combination of properties makes
2385a85cb24fSFrançois Tigeot  * sense.
2386a85cb24fSFrançois Tigeot  *
2387a85cb24fSFrançois Tigeot  * In the case where userspace is interested in OA unit metrics then further
2388a85cb24fSFrançois Tigeot  * config validation and stream initialization details will be handled by
2389a85cb24fSFrançois Tigeot  * i915_oa_stream_init(). The code here should only validate config state that
2390a85cb24fSFrançois Tigeot  * will be relevant to all stream types / backends.
2391a85cb24fSFrançois Tigeot  *
2392a85cb24fSFrançois Tigeot  * Returns: zero on success or a negative error code.
2393a85cb24fSFrançois Tigeot  */
2394a85cb24fSFrançois Tigeot static int
2395a85cb24fSFrançois Tigeot i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
2396a85cb24fSFrançois Tigeot 			    struct drm_i915_perf_open_param *param,
2397a85cb24fSFrançois Tigeot 			    struct perf_open_properties *props,
2398a85cb24fSFrançois Tigeot 			    struct drm_file *file)
2399a85cb24fSFrançois Tigeot {
2400a85cb24fSFrançois Tigeot 	struct i915_gem_context *specific_ctx = NULL;
2401a85cb24fSFrançois Tigeot 	struct i915_perf_stream *stream = NULL;
2402a85cb24fSFrançois Tigeot 	unsigned long f_flags = 0;
2403*3f2dd94aSFrançois Tigeot 	bool privileged_op = true;
2404a85cb24fSFrançois Tigeot 	int stream_fd;
2405a85cb24fSFrançois Tigeot 	int ret;
2406a85cb24fSFrançois Tigeot 
2407a85cb24fSFrançois Tigeot 	if (props->single_context) {
2408a85cb24fSFrançois Tigeot 		u32 ctx_handle = props->ctx_handle;
2409a85cb24fSFrançois Tigeot 		struct drm_i915_file_private *file_priv = file->driver_priv;
2410a85cb24fSFrançois Tigeot 
2411*3f2dd94aSFrançois Tigeot 		specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle);
2412*3f2dd94aSFrançois Tigeot 		if (!specific_ctx) {
2413a85cb24fSFrançois Tigeot 			DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n",
2414a85cb24fSFrançois Tigeot 				  ctx_handle);
2415*3f2dd94aSFrançois Tigeot 			ret = -ENOENT;
2416a85cb24fSFrançois Tigeot 			goto err;
2417a85cb24fSFrançois Tigeot 		}
2418a85cb24fSFrançois Tigeot 	}
2419a85cb24fSFrançois Tigeot 
2420*3f2dd94aSFrançois Tigeot 	/*
2421*3f2dd94aSFrançois Tigeot 	 * On Haswell the OA unit supports clock gating off for a specific
2422*3f2dd94aSFrançois Tigeot 	 * context and in this mode there's no visibility of metrics for the
2423*3f2dd94aSFrançois Tigeot 	 * rest of the system, which we consider acceptable for a
2424*3f2dd94aSFrançois Tigeot 	 * non-privileged client.
2425*3f2dd94aSFrançois Tigeot 	 *
2426*3f2dd94aSFrançois Tigeot 	 * For Gen8+ the OA unit no longer supports clock gating off for a
2427*3f2dd94aSFrançois Tigeot 	 * specific context and the kernel can't securely stop the counters
2428*3f2dd94aSFrançois Tigeot 	 * from updating as system-wide / global values. Even though we can
2429*3f2dd94aSFrançois Tigeot 	 * filter reports based on the included context ID we can't block
2430*3f2dd94aSFrançois Tigeot 	 * clients from seeing the raw / global counter values via
2431*3f2dd94aSFrançois Tigeot 	 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to
2432*3f2dd94aSFrançois Tigeot 	 * enable the OA unit by default.
2433*3f2dd94aSFrançois Tigeot 	 */
2434*3f2dd94aSFrançois Tigeot 	if (IS_HASWELL(dev_priv) && specific_ctx)
2435*3f2dd94aSFrançois Tigeot 		privileged_op = false;
2436*3f2dd94aSFrançois Tigeot 
2437a85cb24fSFrançois Tigeot 	/* Similar to perf's kernel.perf_paranoid_cpu sysctl option
2438a85cb24fSFrançois Tigeot 	 * we check a dev.i915.perf_stream_paranoid sysctl option
2439a85cb24fSFrançois Tigeot 	 * to determine if it's ok to access system wide OA counters
2440a85cb24fSFrançois Tigeot 	 * without CAP_SYS_ADMIN privileges.
2441a85cb24fSFrançois Tigeot 	 */
2442*3f2dd94aSFrançois Tigeot 	if (privileged_op &&
2443a85cb24fSFrançois Tigeot 	    i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
2444a85cb24fSFrançois Tigeot 		DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n");
2445a85cb24fSFrançois Tigeot 		ret = -EACCES;
2446a85cb24fSFrançois Tigeot 		goto err_ctx;
2447a85cb24fSFrançois Tigeot 	}
2448a85cb24fSFrançois Tigeot 
2449a85cb24fSFrançois Tigeot 	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
2450a85cb24fSFrançois Tigeot 	if (!stream) {
2451a85cb24fSFrançois Tigeot 		ret = -ENOMEM;
2452a85cb24fSFrançois Tigeot 		goto err_ctx;
2453a85cb24fSFrançois Tigeot 	}
2454a85cb24fSFrançois Tigeot 
2455a85cb24fSFrançois Tigeot 	stream->dev_priv = dev_priv;
2456a85cb24fSFrançois Tigeot 	stream->ctx = specific_ctx;
2457a85cb24fSFrançois Tigeot 
2458a85cb24fSFrançois Tigeot 	ret = i915_oa_stream_init(stream, param, props);
2459a85cb24fSFrançois Tigeot 	if (ret)
2460a85cb24fSFrançois Tigeot 		goto err_alloc;
2461a85cb24fSFrançois Tigeot 
2462a85cb24fSFrançois Tigeot 	/* we avoid simply assigning stream->sample_flags = props->sample_flags
2463a85cb24fSFrançois Tigeot 	 * to have _stream_init check the combination of sample flags more
2464a85cb24fSFrançois Tigeot 	 * thoroughly, but still this is the expected result at this point.
2465a85cb24fSFrançois Tigeot 	 */
2466a85cb24fSFrançois Tigeot 	if (WARN_ON(stream->sample_flags != props->sample_flags)) {
2467a85cb24fSFrançois Tigeot 		ret = -ENODEV;
2468a85cb24fSFrançois Tigeot 		goto err_flags;
2469a85cb24fSFrançois Tigeot 	}
2470a85cb24fSFrançois Tigeot 
2471a85cb24fSFrançois Tigeot 	list_add(&stream->link, &dev_priv->perf.streams);
2472a85cb24fSFrançois Tigeot 
2473a85cb24fSFrançois Tigeot 	if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
2474a85cb24fSFrançois Tigeot 		f_flags |= O_CLOEXEC;
2475a85cb24fSFrançois Tigeot 	if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
2476a85cb24fSFrançois Tigeot 		f_flags |= O_NONBLOCK;
2477a85cb24fSFrançois Tigeot 
2478a85cb24fSFrançois Tigeot 	stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
2479a85cb24fSFrançois Tigeot 	if (stream_fd < 0) {
2480a85cb24fSFrançois Tigeot 		ret = stream_fd;
2481a85cb24fSFrançois Tigeot 		goto err_open;
2482a85cb24fSFrançois Tigeot 	}
2483a85cb24fSFrançois Tigeot 
2484a85cb24fSFrançois Tigeot 	if (!(param->flags & I915_PERF_FLAG_DISABLED))
2485a85cb24fSFrançois Tigeot 		i915_perf_enable_locked(stream);
2486a85cb24fSFrançois Tigeot 
2487a85cb24fSFrançois Tigeot 	return stream_fd;
2488a85cb24fSFrançois Tigeot 
2489a85cb24fSFrançois Tigeot err_open:
2490a85cb24fSFrançois Tigeot 	list_del(&stream->link);
2491a85cb24fSFrançois Tigeot err_flags:
2492a85cb24fSFrançois Tigeot 	if (stream->ops->destroy)
2493a85cb24fSFrançois Tigeot 		stream->ops->destroy(stream);
2494a85cb24fSFrançois Tigeot err_alloc:
2495a85cb24fSFrançois Tigeot 	kfree(stream);
2496a85cb24fSFrançois Tigeot err_ctx:
2497a85cb24fSFrançois Tigeot 	if (specific_ctx)
2498*3f2dd94aSFrançois Tigeot 		i915_gem_context_put(specific_ctx);
2499a85cb24fSFrançois Tigeot err:
2500a85cb24fSFrançois Tigeot 	return ret;
2501a85cb24fSFrançois Tigeot }
2502a85cb24fSFrançois Tigeot 
2503*3f2dd94aSFrançois Tigeot static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
2504*3f2dd94aSFrançois Tigeot {
2505*3f2dd94aSFrançois Tigeot 	return div_u64(1000000000ULL * (2ULL << exponent),
2506*3f2dd94aSFrançois Tigeot 		       dev_priv->perf.oa.timestamp_frequency);
2507*3f2dd94aSFrançois Tigeot }
2508*3f2dd94aSFrançois Tigeot 
2509a85cb24fSFrançois Tigeot /**
2510a85cb24fSFrançois Tigeot  * read_properties_unlocked - validate + copy userspace stream open properties
2511a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2512a85cb24fSFrançois Tigeot  * @uprops: The array of u64 key value pairs given by userspace
2513a85cb24fSFrançois Tigeot  * @n_props: The number of key value pairs expected in @uprops
2514a85cb24fSFrançois Tigeot  * @props: The stream configuration built up while validating properties
2515a85cb24fSFrançois Tigeot  *
2516a85cb24fSFrançois Tigeot  * Note this function only validates properties in isolation it doesn't
2517a85cb24fSFrançois Tigeot  * validate that the combination of properties makes sense or that all
2518a85cb24fSFrançois Tigeot  * properties necessary for a particular kind of stream have been set.
2519a85cb24fSFrançois Tigeot  *
2520a85cb24fSFrançois Tigeot  * Note that there currently aren't any ordering requirements for properties so
2521a85cb24fSFrançois Tigeot  * we shouldn't validate or assume anything about ordering here. This doesn't
2522a85cb24fSFrançois Tigeot  * rule out defining new properties with ordering requirements in the future.
2523a85cb24fSFrançois Tigeot  */
2524a85cb24fSFrançois Tigeot static int read_properties_unlocked(struct drm_i915_private *dev_priv,
2525a85cb24fSFrançois Tigeot 				    u64 __user *uprops,
2526a85cb24fSFrançois Tigeot 				    u32 n_props,
2527a85cb24fSFrançois Tigeot 				    struct perf_open_properties *props)
2528a85cb24fSFrançois Tigeot {
2529a85cb24fSFrançois Tigeot 	u64 __user *uprop = uprops;
2530*3f2dd94aSFrançois Tigeot 	u32 i;
2531a85cb24fSFrançois Tigeot 
2532a85cb24fSFrançois Tigeot 	memset(props, 0, sizeof(struct perf_open_properties));
2533a85cb24fSFrançois Tigeot 
2534a85cb24fSFrançois Tigeot 	if (!n_props) {
2535a85cb24fSFrançois Tigeot 		DRM_DEBUG("No i915 perf properties given\n");
2536a85cb24fSFrançois Tigeot 		return -EINVAL;
2537a85cb24fSFrançois Tigeot 	}
2538a85cb24fSFrançois Tigeot 
2539a85cb24fSFrançois Tigeot 	/* Considering that ID = 0 is reserved and assuming that we don't
2540a85cb24fSFrançois Tigeot 	 * (currently) expect any configurations to ever specify duplicate
2541a85cb24fSFrançois Tigeot 	 * values for a particular property ID then the last _PROP_MAX value is
2542a85cb24fSFrançois Tigeot 	 * one greater than the maximum number of properties we expect to get
2543a85cb24fSFrançois Tigeot 	 * from userspace.
2544a85cb24fSFrançois Tigeot 	 */
2545a85cb24fSFrançois Tigeot 	if (n_props >= DRM_I915_PERF_PROP_MAX) {
2546a85cb24fSFrançois Tigeot 		DRM_DEBUG("More i915 perf properties specified than exist\n");
2547a85cb24fSFrançois Tigeot 		return -EINVAL;
2548a85cb24fSFrançois Tigeot 	}
2549a85cb24fSFrançois Tigeot 
2550a85cb24fSFrançois Tigeot 	for (i = 0; i < n_props; i++) {
2551a85cb24fSFrançois Tigeot 		u64 oa_period, oa_freq_hz;
2552a85cb24fSFrançois Tigeot 		u64 id, value;
2553a85cb24fSFrançois Tigeot 		int ret;
2554a85cb24fSFrançois Tigeot 
2555a85cb24fSFrançois Tigeot 		ret = get_user(id, uprop);
2556a85cb24fSFrançois Tigeot 		if (ret)
2557a85cb24fSFrançois Tigeot 			return ret;
2558a85cb24fSFrançois Tigeot 
2559a85cb24fSFrançois Tigeot 		ret = get_user(value, uprop + 1);
2560a85cb24fSFrançois Tigeot 		if (ret)
2561a85cb24fSFrançois Tigeot 			return ret;
2562a85cb24fSFrançois Tigeot 
2563a85cb24fSFrançois Tigeot 		if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
2564a85cb24fSFrançois Tigeot 			DRM_DEBUG("Unknown i915 perf property ID\n");
2565a85cb24fSFrançois Tigeot 			return -EINVAL;
2566a85cb24fSFrançois Tigeot 		}
2567a85cb24fSFrançois Tigeot 
2568a85cb24fSFrançois Tigeot 		switch ((enum drm_i915_perf_property_id)id) {
2569a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_CTX_HANDLE:
2570a85cb24fSFrançois Tigeot 			props->single_context = 1;
2571a85cb24fSFrançois Tigeot 			props->ctx_handle = value;
2572a85cb24fSFrançois Tigeot 			break;
2573a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_SAMPLE_OA:
2574a85cb24fSFrançois Tigeot 			props->sample_flags |= SAMPLE_OA_REPORT;
2575a85cb24fSFrançois Tigeot 			break;
2576a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_OA_METRICS_SET:
2577*3f2dd94aSFrançois Tigeot 			if (value == 0) {
2578a85cb24fSFrançois Tigeot 				DRM_DEBUG("Unknown OA metric set ID\n");
2579a85cb24fSFrançois Tigeot 				return -EINVAL;
2580a85cb24fSFrançois Tigeot 			}
2581a85cb24fSFrançois Tigeot 			props->metrics_set = value;
2582a85cb24fSFrançois Tigeot 			break;
2583a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_OA_FORMAT:
2584a85cb24fSFrançois Tigeot 			if (value == 0 || value >= I915_OA_FORMAT_MAX) {
2585a85cb24fSFrançois Tigeot 				DRM_DEBUG("Invalid OA report format\n");
2586a85cb24fSFrançois Tigeot 				return -EINVAL;
2587a85cb24fSFrançois Tigeot 			}
2588a85cb24fSFrançois Tigeot 			if (!dev_priv->perf.oa.oa_formats[value].size) {
2589a85cb24fSFrançois Tigeot 				DRM_DEBUG("Invalid OA report format\n");
2590a85cb24fSFrançois Tigeot 				return -EINVAL;
2591a85cb24fSFrançois Tigeot 			}
2592a85cb24fSFrançois Tigeot 			props->oa_format = value;
2593a85cb24fSFrançois Tigeot 			break;
2594a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_OA_EXPONENT:
2595a85cb24fSFrançois Tigeot 			if (value > OA_EXPONENT_MAX) {
2596a85cb24fSFrançois Tigeot 				DRM_DEBUG("OA timer exponent too high (> %u)\n",
2597a85cb24fSFrançois Tigeot 					 OA_EXPONENT_MAX);
2598a85cb24fSFrançois Tigeot 				return -EINVAL;
2599a85cb24fSFrançois Tigeot 			}
2600a85cb24fSFrançois Tigeot 
2601a85cb24fSFrançois Tigeot 			/* Theoretically we can program the OA unit to sample
2602*3f2dd94aSFrançois Tigeot 			 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns
2603*3f2dd94aSFrançois Tigeot 			 * for BXT. We don't allow such high sampling
2604*3f2dd94aSFrançois Tigeot 			 * frequencies by default unless root.
2605a85cb24fSFrançois Tigeot 			 */
2606*3f2dd94aSFrançois Tigeot 
2607a85cb24fSFrançois Tigeot 			BUILD_BUG_ON(sizeof(oa_period) != 8);
2608*3f2dd94aSFrançois Tigeot 			oa_period = oa_exponent_to_ns(dev_priv, value);
2609a85cb24fSFrançois Tigeot 
2610a85cb24fSFrançois Tigeot 			/* This check is primarily to ensure that oa_period <=
2611a85cb24fSFrançois Tigeot 			 * UINT32_MAX (before passing to do_div which only
2612a85cb24fSFrançois Tigeot 			 * accepts a u32 denominator), but we can also skip
2613a85cb24fSFrançois Tigeot 			 * checking anything < 1Hz which implicitly can't be
2614a85cb24fSFrançois Tigeot 			 * limited via an integer oa_max_sample_rate.
2615a85cb24fSFrançois Tigeot 			 */
2616a85cb24fSFrançois Tigeot 			if (oa_period <= NSEC_PER_SEC) {
2617a85cb24fSFrançois Tigeot 				u64 tmp = NSEC_PER_SEC;
2618a85cb24fSFrançois Tigeot 				do_div(tmp, oa_period);
2619a85cb24fSFrançois Tigeot 				oa_freq_hz = tmp;
2620a85cb24fSFrançois Tigeot 			} else
2621a85cb24fSFrançois Tigeot 				oa_freq_hz = 0;
2622a85cb24fSFrançois Tigeot 
2623a85cb24fSFrançois Tigeot 			if (oa_freq_hz > i915_oa_max_sample_rate &&
2624a85cb24fSFrançois Tigeot 			    !capable(CAP_SYS_ADMIN)) {
2625a85cb24fSFrançois Tigeot 				DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
2626a85cb24fSFrançois Tigeot 					  i915_oa_max_sample_rate);
2627a85cb24fSFrançois Tigeot 				return -EACCES;
2628a85cb24fSFrançois Tigeot 			}
2629a85cb24fSFrançois Tigeot 
2630a85cb24fSFrançois Tigeot 			props->oa_periodic = true;
2631a85cb24fSFrançois Tigeot 			props->oa_period_exponent = value;
2632a85cb24fSFrançois Tigeot 			break;
2633a85cb24fSFrançois Tigeot 		case DRM_I915_PERF_PROP_MAX:
2634a85cb24fSFrançois Tigeot 			MISSING_CASE(id);
2635a85cb24fSFrançois Tigeot 			return -EINVAL;
2636a85cb24fSFrançois Tigeot 		}
2637a85cb24fSFrançois Tigeot 
2638a85cb24fSFrançois Tigeot 		uprop += 2;
2639a85cb24fSFrançois Tigeot 	}
2640a85cb24fSFrançois Tigeot 
2641a85cb24fSFrançois Tigeot 	return 0;
2642a85cb24fSFrançois Tigeot }
2643a85cb24fSFrançois Tigeot #endif
2644a85cb24fSFrançois Tigeot 
2645a85cb24fSFrançois Tigeot /**
2646a85cb24fSFrançois Tigeot  * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD
2647a85cb24fSFrançois Tigeot  * @dev: drm device
2648a85cb24fSFrançois Tigeot  * @data: ioctl data copied from userspace (unvalidated)
2649a85cb24fSFrançois Tigeot  * @file: drm file
2650a85cb24fSFrançois Tigeot  *
2651a85cb24fSFrançois Tigeot  * Validates the stream open parameters given by userspace including flags
2652a85cb24fSFrançois Tigeot  * and an array of u64 key, value pair properties.
2653a85cb24fSFrançois Tigeot  *
2654a85cb24fSFrançois Tigeot  * Very little is assumed up front about the nature of the stream being
2655a85cb24fSFrançois Tigeot  * opened (for instance we don't assume it's for periodic OA unit metrics). An
2656a85cb24fSFrançois Tigeot  * i915-perf stream is expected to be a suitable interface for other forms of
2657a85cb24fSFrançois Tigeot  * buffered data written by the GPU besides periodic OA metrics.
2658a85cb24fSFrançois Tigeot  *
2659a85cb24fSFrançois Tigeot  * Note we copy the properties from userspace outside of the i915 perf
2660a85cb24fSFrançois Tigeot  * mutex to avoid an awkward lockdep with mmap_sem.
2661a85cb24fSFrançois Tigeot  *
2662a85cb24fSFrançois Tigeot  * Most of the implementation details are handled by
2663a85cb24fSFrançois Tigeot  * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock
2664a85cb24fSFrançois Tigeot  * mutex for serializing with any non-file-operation driver hooks.
2665a85cb24fSFrançois Tigeot  *
2666a85cb24fSFrançois Tigeot  * Return: A newly opened i915 Perf stream file descriptor or negative
2667a85cb24fSFrançois Tigeot  * error code on failure.
2668a85cb24fSFrançois Tigeot  */
i915_perf_open_ioctl(struct drm_device * dev,void * data,struct drm_file * file)2669a85cb24fSFrançois Tigeot int i915_perf_open_ioctl(struct drm_device *dev, void *data,
2670a85cb24fSFrançois Tigeot 			 struct drm_file *file)
2671a85cb24fSFrançois Tigeot {
2672a85cb24fSFrançois Tigeot #if 0
2673a85cb24fSFrançois Tigeot 	struct drm_i915_private *dev_priv = dev->dev_private;
2674a85cb24fSFrançois Tigeot 	struct drm_i915_perf_open_param *param = data;
2675a85cb24fSFrançois Tigeot 	struct perf_open_properties props;
2676a85cb24fSFrançois Tigeot 	u32 known_open_flags;
2677a85cb24fSFrançois Tigeot 	int ret;
2678a85cb24fSFrançois Tigeot 
2679a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.initialized) {
2680a85cb24fSFrançois Tigeot #endif
2681a85cb24fSFrançois Tigeot 		DRM_DEBUG("i915 perf interface not available for this system\n");
2682a85cb24fSFrançois Tigeot 		return -ENOTSUPP;
2683a85cb24fSFrançois Tigeot #if 0
2684a85cb24fSFrançois Tigeot 	}
2685a85cb24fSFrançois Tigeot 
2686a85cb24fSFrançois Tigeot 	known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
2687a85cb24fSFrançois Tigeot 			   I915_PERF_FLAG_FD_NONBLOCK |
2688a85cb24fSFrançois Tigeot 			   I915_PERF_FLAG_DISABLED;
2689a85cb24fSFrançois Tigeot 	if (param->flags & ~known_open_flags) {
2690a85cb24fSFrançois Tigeot 		DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n");
2691a85cb24fSFrançois Tigeot 		return -EINVAL;
2692a85cb24fSFrançois Tigeot 	}
2693a85cb24fSFrançois Tigeot 
2694a85cb24fSFrançois Tigeot 	ret = read_properties_unlocked(dev_priv,
2695a85cb24fSFrançois Tigeot 				       u64_to_user_ptr(param->properties_ptr),
2696a85cb24fSFrançois Tigeot 				       param->num_properties,
2697a85cb24fSFrançois Tigeot 				       &props);
2698a85cb24fSFrançois Tigeot 	if (ret)
2699a85cb24fSFrançois Tigeot 		return ret;
2700a85cb24fSFrançois Tigeot 
2701a85cb24fSFrançois Tigeot 	mutex_lock(&dev_priv->perf.lock);
2702a85cb24fSFrançois Tigeot 	ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
2703a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->perf.lock);
2704a85cb24fSFrançois Tigeot 
2705a85cb24fSFrançois Tigeot 	return ret;
2706a85cb24fSFrançois Tigeot #endif
2707a85cb24fSFrançois Tigeot }
2708a85cb24fSFrançois Tigeot 
2709a85cb24fSFrançois Tigeot /**
2710a85cb24fSFrançois Tigeot  * i915_perf_register - exposes i915-perf to userspace
2711a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2712a85cb24fSFrançois Tigeot  *
2713a85cb24fSFrançois Tigeot  * In particular OA metric sets are advertised under a sysfs metrics/
2714a85cb24fSFrançois Tigeot  * directory allowing userspace to enumerate valid IDs that can be
2715a85cb24fSFrançois Tigeot  * used to open an i915-perf stream.
2716a85cb24fSFrançois Tigeot  */
i915_perf_register(struct drm_i915_private * dev_priv)2717a85cb24fSFrançois Tigeot void i915_perf_register(struct drm_i915_private *dev_priv)
2718a85cb24fSFrançois Tigeot {
2719*3f2dd94aSFrançois Tigeot #if 0
2720*3f2dd94aSFrançois Tigeot 	int ret;
2721a85cb24fSFrançois Tigeot 
2722a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.initialized)
2723a85cb24fSFrançois Tigeot 		return;
2724a85cb24fSFrançois Tigeot 
2725a85cb24fSFrançois Tigeot 	/* To be sure we're synchronized with an attempted
2726a85cb24fSFrançois Tigeot 	 * i915_perf_open_ioctl(); considering that we register after
2727a85cb24fSFrançois Tigeot 	 * being exposed to userspace.
2728a85cb24fSFrançois Tigeot 	 */
2729a85cb24fSFrançois Tigeot 	mutex_lock(&dev_priv->perf.lock);
2730a85cb24fSFrançois Tigeot 
2731a85cb24fSFrançois Tigeot 	dev_priv->perf.metrics_kobj =
2732a85cb24fSFrançois Tigeot 		kobject_create_and_add("metrics",
2733a85cb24fSFrançois Tigeot 				       &dev_priv->drm.primary->kdev->kobj);
2734a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.metrics_kobj)
2735a85cb24fSFrançois Tigeot 		goto exit;
2736a85cb24fSFrançois Tigeot 
2737*3f2dd94aSFrançois Tigeot 	sysfs_attr_init(&dev_priv->perf.oa.test_config.sysfs_metric_id.attr);
2738*3f2dd94aSFrançois Tigeot 
2739*3f2dd94aSFrançois Tigeot 	if (IS_HASWELL(dev_priv)) {
2740*3f2dd94aSFrançois Tigeot 		i915_perf_load_test_config_hsw(dev_priv);
2741*3f2dd94aSFrançois Tigeot 	} else if (IS_BROADWELL(dev_priv)) {
2742*3f2dd94aSFrançois Tigeot 		i915_perf_load_test_config_bdw(dev_priv);
2743*3f2dd94aSFrançois Tigeot 	} else if (IS_CHERRYVIEW(dev_priv)) {
2744*3f2dd94aSFrançois Tigeot 		i915_perf_load_test_config_chv(dev_priv);
2745*3f2dd94aSFrançois Tigeot 	} else if (IS_SKYLAKE(dev_priv)) {
2746*3f2dd94aSFrançois Tigeot 		if (IS_SKL_GT2(dev_priv))
2747*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_sklgt2(dev_priv);
2748*3f2dd94aSFrançois Tigeot 		else if (IS_SKL_GT3(dev_priv))
2749*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_sklgt3(dev_priv);
2750*3f2dd94aSFrançois Tigeot 		else if (IS_SKL_GT4(dev_priv))
2751*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_sklgt4(dev_priv);
2752*3f2dd94aSFrançois Tigeot 	} else if (IS_BROXTON(dev_priv)) {
2753*3f2dd94aSFrançois Tigeot 		i915_perf_load_test_config_bxt(dev_priv);
2754*3f2dd94aSFrançois Tigeot 	} else if (IS_KABYLAKE(dev_priv)) {
2755*3f2dd94aSFrançois Tigeot 		if (IS_KBL_GT2(dev_priv))
2756*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_kblgt2(dev_priv);
2757*3f2dd94aSFrançois Tigeot 		else if (IS_KBL_GT3(dev_priv))
2758*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_kblgt3(dev_priv);
2759*3f2dd94aSFrançois Tigeot 	} else if (IS_GEMINILAKE(dev_priv)) {
2760*3f2dd94aSFrançois Tigeot 		i915_perf_load_test_config_glk(dev_priv);
2761*3f2dd94aSFrançois Tigeot 	} else if (IS_COFFEELAKE(dev_priv)) {
2762*3f2dd94aSFrançois Tigeot 		if (IS_CFL_GT2(dev_priv))
2763*3f2dd94aSFrançois Tigeot 			i915_perf_load_test_config_cflgt2(dev_priv);
2764*3f2dd94aSFrançois Tigeot 	}
2765*3f2dd94aSFrançois Tigeot 
2766*3f2dd94aSFrançois Tigeot 	if (dev_priv->perf.oa.test_config.id == 0)
2767*3f2dd94aSFrançois Tigeot 		goto sysfs_error;
2768*3f2dd94aSFrançois Tigeot 
2769*3f2dd94aSFrançois Tigeot 	ret = sysfs_create_group(dev_priv->perf.metrics_kobj,
2770*3f2dd94aSFrançois Tigeot 				 &dev_priv->perf.oa.test_config.sysfs_metric);
2771*3f2dd94aSFrançois Tigeot 	if (ret)
2772*3f2dd94aSFrançois Tigeot 		goto sysfs_error;
2773*3f2dd94aSFrançois Tigeot 
2774*3f2dd94aSFrançois Tigeot 	atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1);
2775*3f2dd94aSFrançois Tigeot 
2776*3f2dd94aSFrançois Tigeot 	goto exit;
2777*3f2dd94aSFrançois Tigeot 
2778*3f2dd94aSFrançois Tigeot sysfs_error:
2779a85cb24fSFrançois Tigeot 	kobject_put(dev_priv->perf.metrics_kobj);
2780a85cb24fSFrançois Tigeot 	dev_priv->perf.metrics_kobj = NULL;
2781a85cb24fSFrançois Tigeot 
2782a85cb24fSFrançois Tigeot exit:
2783a85cb24fSFrançois Tigeot 	mutex_unlock(&dev_priv->perf.lock);
2784a85cb24fSFrançois Tigeot #endif
2785a85cb24fSFrançois Tigeot }
2786a85cb24fSFrançois Tigeot 
2787a85cb24fSFrançois Tigeot /**
2788a85cb24fSFrançois Tigeot  * i915_perf_unregister - hide i915-perf from userspace
2789a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2790a85cb24fSFrançois Tigeot  *
2791a85cb24fSFrançois Tigeot  * i915-perf state cleanup is split up into an 'unregister' and
2792a85cb24fSFrançois Tigeot  * 'deinit' phase where the interface is first hidden from
2793a85cb24fSFrançois Tigeot  * userspace by i915_perf_unregister() before cleaning up
2794a85cb24fSFrançois Tigeot  * remaining state in i915_perf_fini().
2795a85cb24fSFrançois Tigeot  */
i915_perf_unregister(struct drm_i915_private * dev_priv)2796a85cb24fSFrançois Tigeot void i915_perf_unregister(struct drm_i915_private *dev_priv)
2797a85cb24fSFrançois Tigeot {
2798a85cb24fSFrançois Tigeot 	if (!IS_HASWELL(dev_priv))
2799a85cb24fSFrançois Tigeot 		return;
2800a85cb24fSFrançois Tigeot 
2801a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.metrics_kobj)
2802a85cb24fSFrançois Tigeot 		return;
2803a85cb24fSFrançois Tigeot 
2804a85cb24fSFrançois Tigeot #if 0
2805a85cb24fSFrançois Tigeot 	i915_perf_unregister_sysfs_hsw(dev_priv);
2806a85cb24fSFrançois Tigeot 
2807a85cb24fSFrançois Tigeot 	kobject_put(dev_priv->perf.metrics_kobj);
2808a85cb24fSFrançois Tigeot 	dev_priv->perf.metrics_kobj = NULL;
2809a85cb24fSFrançois Tigeot #endif
2810a85cb24fSFrançois Tigeot }
2811a85cb24fSFrançois Tigeot 
2812a85cb24fSFrançois Tigeot #if 0
2813a85cb24fSFrançois Tigeot static struct ctl_table oa_table[] = {
2814a85cb24fSFrançois Tigeot 	{
2815a85cb24fSFrançois Tigeot 	 .procname = "perf_stream_paranoid",
2816a85cb24fSFrançois Tigeot 	 .data = &i915_perf_stream_paranoid,
2817a85cb24fSFrançois Tigeot 	 .maxlen = sizeof(i915_perf_stream_paranoid),
2818a85cb24fSFrançois Tigeot 	 .mode = 0644,
2819a85cb24fSFrançois Tigeot 	 .proc_handler = proc_dointvec_minmax,
2820a85cb24fSFrançois Tigeot 	 .extra1 = &zero,
2821a85cb24fSFrançois Tigeot 	 .extra2 = &one,
2822a85cb24fSFrançois Tigeot 	 },
2823a85cb24fSFrançois Tigeot 	{
2824a85cb24fSFrançois Tigeot 	 .procname = "oa_max_sample_rate",
2825a85cb24fSFrançois Tigeot 	 .data = &i915_oa_max_sample_rate,
2826a85cb24fSFrançois Tigeot 	 .maxlen = sizeof(i915_oa_max_sample_rate),
2827a85cb24fSFrançois Tigeot 	 .mode = 0644,
2828a85cb24fSFrançois Tigeot 	 .proc_handler = proc_dointvec_minmax,
2829a85cb24fSFrançois Tigeot 	 .extra1 = &zero,
2830a85cb24fSFrançois Tigeot 	 .extra2 = &oa_sample_rate_hard_limit,
2831a85cb24fSFrançois Tigeot 	 },
2832a85cb24fSFrançois Tigeot 	{}
2833a85cb24fSFrançois Tigeot };
2834a85cb24fSFrançois Tigeot 
2835a85cb24fSFrançois Tigeot static struct ctl_table i915_root[] = {
2836a85cb24fSFrançois Tigeot 	{
2837a85cb24fSFrançois Tigeot 	 .procname = "i915",
2838a85cb24fSFrançois Tigeot 	 .maxlen = 0,
2839a85cb24fSFrançois Tigeot 	 .mode = 0555,
2840a85cb24fSFrançois Tigeot 	 .child = oa_table,
2841a85cb24fSFrançois Tigeot 	 },
2842a85cb24fSFrançois Tigeot 	{}
2843a85cb24fSFrançois Tigeot };
2844a85cb24fSFrançois Tigeot 
2845a85cb24fSFrançois Tigeot static struct ctl_table dev_root[] = {
2846a85cb24fSFrançois Tigeot 	{
2847a85cb24fSFrançois Tigeot 	 .procname = "dev",
2848a85cb24fSFrançois Tigeot 	 .maxlen = 0,
2849a85cb24fSFrançois Tigeot 	 .mode = 0555,
2850a85cb24fSFrançois Tigeot 	 .child = i915_root,
2851a85cb24fSFrançois Tigeot 	 },
2852a85cb24fSFrançois Tigeot 	{}
2853a85cb24fSFrançois Tigeot };
2854a85cb24fSFrançois Tigeot #endif
2855a85cb24fSFrançois Tigeot 
2856a85cb24fSFrançois Tigeot /**
2857a85cb24fSFrançois Tigeot  * i915_perf_init - initialize i915-perf state on module load
2858a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2859a85cb24fSFrançois Tigeot  *
2860a85cb24fSFrançois Tigeot  * Initializes i915-perf state without exposing anything to userspace.
2861a85cb24fSFrançois Tigeot  *
2862a85cb24fSFrançois Tigeot  * Note: i915-perf initialization is split into an 'init' and 'register'
2863a85cb24fSFrançois Tigeot  * phase with the i915_perf_register() exposing state to userspace.
2864a85cb24fSFrançois Tigeot  */
i915_perf_init(struct drm_i915_private * dev_priv)2865a85cb24fSFrançois Tigeot void i915_perf_init(struct drm_i915_private *dev_priv)
2866a85cb24fSFrançois Tigeot {
2867*3f2dd94aSFrançois Tigeot 	dev_priv->perf.oa.timestamp_frequency = 0;
2868a85cb24fSFrançois Tigeot 
2869a85cb24fSFrançois Tigeot #if 0
2870*3f2dd94aSFrançois Tigeot 	if (IS_HASWELL(dev_priv)) {
2871*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_b_counter_reg =
2872*3f2dd94aSFrançois Tigeot 			gen7_is_valid_b_counter_addr;
2873*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_mux_reg =
2874*3f2dd94aSFrançois Tigeot 			hsw_is_valid_mux_addr;
2875*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_flex_reg = NULL;
2876*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
2877*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
2878*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
2879*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
2880*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
2881*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.read = gen7_oa_read;
2882*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_hw_tail_read =
2883*3f2dd94aSFrançois Tigeot 			gen7_oa_hw_tail_read;
2884*3f2dd94aSFrançois Tigeot 
2885*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.timestamp_frequency = 12500000;
2886*3f2dd94aSFrançois Tigeot 
2887*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
2888*3f2dd94aSFrançois Tigeot 	} else if (i915_modparams.enable_execlists) {
2889*3f2dd94aSFrançois Tigeot 		/* Note: that although we could theoretically also support the
2890*3f2dd94aSFrançois Tigeot 		 * legacy ringbuffer mode on BDW (and earlier iterations of
2891*3f2dd94aSFrançois Tigeot 		 * this driver, before upstreaming did this) it didn't seem
2892*3f2dd94aSFrançois Tigeot 		 * worth the complexity to maintain now that BDW+ enable
2893*3f2dd94aSFrançois Tigeot 		 * execlist mode by default.
2894*3f2dd94aSFrançois Tigeot 		 */
2895*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_b_counter_reg =
2896*3f2dd94aSFrançois Tigeot 			gen7_is_valid_b_counter_addr;
2897*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_mux_reg =
2898*3f2dd94aSFrançois Tigeot 			gen8_is_valid_mux_addr;
2899*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.is_valid_flex_reg =
2900*3f2dd94aSFrançois Tigeot 			gen8_is_valid_flex_addr;
2901*3f2dd94aSFrançois Tigeot 
2902*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.init_oa_buffer = gen8_init_oa_buffer;
2903*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.enable_metric_set = gen8_enable_metric_set;
2904*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.disable_metric_set = gen8_disable_metric_set;
2905*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_enable = gen8_oa_enable;
2906*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_disable = gen8_oa_disable;
2907*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.read = gen8_oa_read;
2908*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
2909*3f2dd94aSFrançois Tigeot 
2910*3f2dd94aSFrançois Tigeot 		dev_priv->perf.oa.oa_formats = gen8_plus_oa_formats;
2911*3f2dd94aSFrançois Tigeot 
2912*3f2dd94aSFrançois Tigeot 		if (IS_GEN8(dev_priv)) {
2913*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
2914*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
2915*3f2dd94aSFrançois Tigeot 
2916*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.timestamp_frequency = 12500000;
2917*3f2dd94aSFrançois Tigeot 
2918*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
2919*3f2dd94aSFrançois Tigeot 			if (IS_CHERRYVIEW(dev_priv)) {
2920*3f2dd94aSFrançois Tigeot 				dev_priv->perf.oa.ops.is_valid_mux_reg =
2921*3f2dd94aSFrançois Tigeot 					chv_is_valid_mux_addr;
2922*3f2dd94aSFrançois Tigeot 			}
2923*3f2dd94aSFrançois Tigeot 		} else if (IS_GEN9(dev_priv)) {
2924*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
2925*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
2926*3f2dd94aSFrançois Tigeot 
2927*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
2928*3f2dd94aSFrançois Tigeot 
2929*3f2dd94aSFrançois Tigeot 			switch (dev_priv->info.platform) {
2930*3f2dd94aSFrançois Tigeot 			case INTEL_BROXTON:
2931*3f2dd94aSFrançois Tigeot 			case INTEL_GEMINILAKE:
2932*3f2dd94aSFrançois Tigeot 				dev_priv->perf.oa.timestamp_frequency = 19200000;
2933*3f2dd94aSFrançois Tigeot 				break;
2934*3f2dd94aSFrançois Tigeot 			case INTEL_SKYLAKE:
2935*3f2dd94aSFrançois Tigeot 			case INTEL_KABYLAKE:
2936*3f2dd94aSFrançois Tigeot 			case INTEL_COFFEELAKE:
2937*3f2dd94aSFrançois Tigeot 				dev_priv->perf.oa.timestamp_frequency = 12000000;
2938*3f2dd94aSFrançois Tigeot 				break;
2939*3f2dd94aSFrançois Tigeot 			default:
2940*3f2dd94aSFrançois Tigeot 				/* Leave timestamp_frequency to 0 so we can
2941*3f2dd94aSFrançois Tigeot 				 * detect unsupported platforms.
2942*3f2dd94aSFrançois Tigeot 				 */
2943*3f2dd94aSFrançois Tigeot 				break;
2944*3f2dd94aSFrançois Tigeot 			}
2945*3f2dd94aSFrançois Tigeot 		}
2946*3f2dd94aSFrançois Tigeot 	}
2947*3f2dd94aSFrançois Tigeot 
2948*3f2dd94aSFrançois Tigeot 	if (dev_priv->perf.oa.timestamp_frequency) {
2949a85cb24fSFrançois Tigeot 		hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
2950a85cb24fSFrançois Tigeot 				CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2951a85cb24fSFrançois Tigeot 		dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
2952a85cb24fSFrançois Tigeot 		init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
2953a85cb24fSFrançois Tigeot 
2954a85cb24fSFrançois Tigeot 		INIT_LIST_HEAD(&dev_priv->perf.streams);
2955a85cb24fSFrançois Tigeot 		mutex_init(&dev_priv->perf.lock);
2956*3f2dd94aSFrançois Tigeot 		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
2957a85cb24fSFrançois Tigeot 
2958*3f2dd94aSFrançois Tigeot 		oa_sample_rate_hard_limit =
2959*3f2dd94aSFrançois Tigeot 			dev_priv->perf.oa.timestamp_frequency / 2;
2960a85cb24fSFrançois Tigeot 		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
2961*3f2dd94aSFrançois Tigeot 
2962*3f2dd94aSFrançois Tigeot 		mutex_init(&dev_priv->perf.metrics_lock);
2963*3f2dd94aSFrançois Tigeot 		idr_init(&dev_priv->perf.metrics_idr);
2964a85cb24fSFrançois Tigeot 
2965a85cb24fSFrançois Tigeot 		dev_priv->perf.initialized = true;
2966a85cb24fSFrançois Tigeot 	}
2967*3f2dd94aSFrançois Tigeot #endif
2968*3f2dd94aSFrançois Tigeot }
2969a85cb24fSFrançois Tigeot 
2970a85cb24fSFrançois Tigeot /**
2971a85cb24fSFrançois Tigeot  * i915_perf_fini - Counter part to i915_perf_init()
2972a85cb24fSFrançois Tigeot  * @dev_priv: i915 device instance
2973a85cb24fSFrançois Tigeot  */
i915_perf_fini(struct drm_i915_private * dev_priv)2974a85cb24fSFrançois Tigeot void i915_perf_fini(struct drm_i915_private *dev_priv)
2975a85cb24fSFrançois Tigeot {
2976a85cb24fSFrançois Tigeot 	if (!dev_priv->perf.initialized)
2977a85cb24fSFrançois Tigeot 		return;
2978a85cb24fSFrançois Tigeot 
2979a85cb24fSFrançois Tigeot #if 0
2980a85cb24fSFrançois Tigeot 	unregister_sysctl_table(dev_priv->perf.sysctl_header);
2981a85cb24fSFrançois Tigeot 
2982a85cb24fSFrançois Tigeot 	memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
2983*3f2dd94aSFrançois Tigeot 
2984a85cb24fSFrançois Tigeot #endif
2985a85cb24fSFrançois Tigeot 	dev_priv->perf.initialized = false;
2986a85cb24fSFrançois Tigeot }
2987