Lines Matching defs:perf
35 * This i915 perf interface enables userspace to configure and open a file
56 * i915 perf file descriptors represent a "stream" instead of an "event"; where
57 * a perf event primarily corresponds to a single 64bit value, while a stream
61 * of related counters. Samples for an i915 perf stream capturing OA metrics
68 * i915 perf stream configurations are provided as an array of u64 (key,value)
72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer.
85 * The first prototype of this driver was based on the core perf
87 * perf, we found we were breaking or working around too many assumptions baked
88 * into perf's currently cpu centric design.
90 * In the end we didn't see a clear benefit to making perf's implementation and
92 * wouldn't be able to use any existing perf based userspace tools.
95 * how userspace will sometimes need to combine i915 perf OA metrics with
99 * a standard vendor/architecture agnostic interface by not using perf.
102 * For posterity, in case we might re-visit trying to adapt core perf to be
106 * - The perf based OA PMU driver broke some significant design assumptions:
108 * Existing perf pmus are used for profiling work on a cpu and we were
111 * registers) to fit with perf's current design, and adding _DEVICE records
118 * buffer to perf's buffer, those bursts of sample writes looked to perf like
128 * set while perf generally expects counter configurations to be orthogonal.
133 * GPU context to filter metrics on). We avoided using perf's grouping
134 * feature and forwarded OA reports to userspace via perf's 'raw' sample
142 * - As a side note on perf's grouping feature; there was also some concern
156 * event scheduling is a central design idea within perf for allowing
178 * - It felt like our perf based PMU was making some technical compromises
179 * just for the sake of using perf:
185 * cpu id, perf ensures pmu methods will be invoked via an inter process
187 * perf events for a specific cpu. This was workable but it meant the
308 * The default threshold of 100000Hz is based on perf's similar
411 i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
416 oa_config = idr_find(&perf->metrics_idr, metrics_set);
475 (GRAPHICS_VER(stream->perf->i915) == 12 ?
491 stream->perf->gen8_valid_ctx_bit);
565 hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
608 __ratelimit(&stream->perf->tail_pointer_race))
627 * @stream: An i915-perf stream opened for OA metrics
661 * @stream: An i915-perf stream opened for OA metrics
722 * @stream: An i915-perf stream opened for OA metrics
926 oaheadptr = GRAPHICS_VER(stream->perf->i915) == 12 ?
949 * @stream: An i915-perf stream opened for OA metrics
980 oastatus_reg = GRAPHICS_VER(stream->perf->i915) == 12 ?
1006 drm_dbg(&stream->perf->i915->drm,
1010 stream->perf->ops.oa_disable(stream);
1011 stream->perf->ops.oa_enable(stream);
1040 * @stream: An i915-perf stream opened for OA metrics
1131 if (__ratelimit(&stream->perf->spurious_report_rs))
1169 * @stream: An i915-perf stream opened for OA metrics
1202 oastatus1 &= ~stream->perf->gen7_latched_oastatus1;
1230 drm_dbg(&stream->perf->i915->drm,
1234 stream->perf->ops.oa_disable(stream);
1235 stream->perf->ops.oa_enable(stream);
1245 stream->perf->gen7_latched_oastatus1 |=
1256 * @stream: An i915-perf stream opened for OA metrics
1280 * @stream: An i915-perf stream opened for OA metrics
1281 * @file: An i915 perf stream file
1284 * For handling userspace polling on an i915 perf stream opened for OA metrics,
1297 * @stream: An i915-perf stream opened for OA metrics
1312 return stream->perf->ops.read(stream, buf, count, offset);
1526 struct i915_perf *perf = &ce->engine->i915->perf;
1527 u32 offset = perf->ctx_oactxctrl_offset;
1534 perf->ctx_oactxctrl_offset = offset;
1551 * @stream: An i915-perf stream opened for OA metrics
1569 HAS_LOGICAL_RING_CONTEXTS(stream->perf->i915)) {
1571 * We are enabling perf query here. If we don't find the context
1577 drm_err(&stream->perf->i915->drm,
1578 "Enabling perf query failed for %s\n",
1634 drm_dbg(&stream->perf->i915->drm,
1644 * @stream: An i915-perf stream opened for OA metrics
1704 struct i915_perf *perf = stream->perf;
1718 perf->ops.disable_metric_set(stream);
1738 if (perf->spurious_report_rs.missed) {
1741 perf->spurious_report_rs.missed);
1776 stream->perf->gen7_latched_oastatus1 = 0;
1903 struct drm_i915_private *i915 = stream->perf->i915;
1915 bo = i915_gem_object_create_shmem(stream->perf->i915, OA_BUFFER_SIZE);
1972 if (GRAPHICS_VER(stream->perf->i915) >= 8)
1987 struct drm_i915_private *i915 = stream->perf->i915;
1992 intel_gt_ns_to_clock_interval(to_gt(stream->perf->i915),
1993 atomic64_read(&stream->perf->noa_programming_delay));
2033 * needs to be fixed during the lifetime of the i915/perf stream.
2264 obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
2293 *cs++ = (GRAPHICS_VER(stream->perf->i915) < 8 ?
2502 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2503 u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2667 u32 offset = stream->perf->ctx_oactxctrl_offset;
2742 struct drm_i915_private *i915 = stream->perf->i915;
2748 lockdep_assert_held(>->perf.lock);
2811 u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
2813 const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
2879 if (IS_GRAPHICS_VER(stream->perf->i915, 9, 11)) {
2910 struct drm_i915_private *i915 = stream->perf->i915;
2995 struct drm_i915_private *i915 = stream->perf->i915;
3104 * @stream: An i915 perf stream opened for OA metrics
3115 stream->perf->ops.oa_enable(stream);
3133 drm_err(&stream->perf->i915->drm,
3145 drm_err(&stream->perf->i915->drm,
3158 drm_err(&stream->perf->i915->drm,
3166 drm_err(&stream->perf->i915->drm,
3174 * @stream: An i915 perf stream opened for OA metrics
3182 stream->perf->ops.oa_disable(stream);
3206 err = stream->perf->ops.enable_metric_set(stream, active);
3278 * @stream: An i915 perf stream
3298 struct drm_i915_private *i915 = stream->perf->i915;
3299 struct i915_perf *perf = stream->perf;
3305 drm_dbg(&stream->perf->i915->drm,
3317 if (!perf->metrics_kobj) {
3318 drm_dbg(&stream->perf->i915->drm,
3324 (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) {
3325 drm_dbg(&stream->perf->i915->drm,
3330 if (!perf->ops.enable_metric_set) {
3331 drm_dbg(&stream->perf->i915->drm,
3342 drm_dbg(&stream->perf->i915->drm,
3348 drm_dbg(&stream->perf->i915->drm,
3358 stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
3374 drm_dbg(&stream->perf->i915->drm,
3382 drm_dbg(&stream->perf->i915->drm,
3387 stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
3389 drm_dbg(&stream->perf->i915->drm,
3421 drm_dbg(&stream->perf->i915->drm,
3435 stream->engine->gt->perf.sseu = props->sseu;
3440 drm_dbg(&stream->perf->i915->drm,
3445 drm_dbg(&stream->perf->i915->drm,
3460 perf->ops.disable_metric_set(stream);
3494 /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
3496 if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
3503 * i915_perf_read - handles read() FOP for i915 perf stream FDs
3504 * @file: An i915 perf stream file
3595 * @stream: An i915 perf stream
3596 * @file: An i915 perf stream file
3599 * For handling userspace polling on an i915 perf stream, this calls through to
3627 * @file: An i915 perf stream file
3630 * For handling userspace polling on an i915 perf stream, this ensures
3652 * @stream: A disabled i915 perf stream
3677 * @stream: An enabled i915 perf stream
3710 config = i915_perf_get_oa_config(stream->perf, metrics_set);
3739 * i915_perf_ioctl_locked - support ioctl() usage with i915 perf stream FDs
3740 * @stream: An i915 perf stream
3766 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs
3767 * @file: An i915 perf stream file
3791 * i915_perf_destroy_locked - destroy an i915 perf stream
3792 * @stream: An i915 perf stream
3794 * Frees all resources associated with the given i915 perf @stream, disabling
3797 * Note: The >->perf.lock mutex has been taken to serialize
3817 * @file: An i915 perf stream file
3819 * Cleans up any resources associated with an open i915 perf stream file.
3828 struct i915_perf *perf = stream->perf;
3833 * other user of stream->lock. Use the perf lock to destroy the stream
3836 mutex_lock(>->perf.lock);
3838 mutex_unlock(>->perf.lock);
3840 /* Release the reference the perf stream kept on the driver. */
3841 drm_dev_put(&perf->i915->drm);
3864 * @perf: i915 perf instance
3872 * behalf of i915_perf_open_ioctl() with the >->perf.lock mutex
3887 i915_perf_open_ioctl_locked(struct i915_perf *perf,
3908 drm_dbg(&perf->i915->drm,
3909 "Failed to look up context with ID %u for opening perf stream\n",
3935 if (IS_HASWELL(perf->i915) && specific_ctx)
3937 else if (GRAPHICS_VER(perf->i915) == 12 && specific_ctx &&
3943 drm_dbg(&perf->i915->drm,
3959 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
3966 drm_dbg(&perf->i915->drm,
3967 "Insufficient privileges to open i915 perf stream\n");
3978 stream->perf = perf;
4012 drm_dev_get(&perf->i915->drm);
4029 static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
4032 u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
4038 oa_format_valid(struct i915_perf *perf, enum drm_i915_oa_format format)
4040 return test_bit(format, perf->format_mask);
4044 oa_format_add(struct i915_perf *perf, enum drm_i915_oa_format format)
4046 __set_bit(format, perf->format_mask);
4051 * @perf: i915 perf instance
4064 static int read_properties_unlocked(struct i915_perf *perf,
4089 drm_dbg(&perf->i915->drm,
4090 "Invalid number of i915 perf properties given\n");
4111 drm_dbg(&perf->i915->drm,
4112 "Unknown i915 perf property ID\n");
4127 drm_dbg(&perf->i915->drm,
4135 drm_dbg(&perf->i915->drm,
4140 if (!oa_format_valid(perf, value)) {
4141 drm_dbg(&perf->i915->drm,
4150 drm_dbg(&perf->i915->drm,
4163 oa_period = oa_exponent_to_ns(perf, value);
4179 drm_dbg(&perf->i915->drm,
4192 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) {
4193 drm_dbg(&perf->i915->drm,
4195 GRAPHICS_VER_FULL(perf->i915));
4202 drm_dbg(&perf->i915->drm,
4211 drm_dbg(&perf->i915->drm,
4236 drm_dbg(&perf->i915->drm,
4241 props->engine = intel_engine_lookup_user(perf->i915, class, instance);
4243 drm_dbg(&perf->i915->drm,
4250 drm_dbg(&perf->i915->drm,
4264 drm_dbg(&perf->i915->drm,
4270 f = &perf->oa_formats[i];
4272 drm_dbg(&perf->i915->drm,
4281 drm_dbg(&perf->i915->drm,
4302 * i915-perf stream is expected to be a suitable interface for other forms of
4305 * Note we copy the properties from userspace outside of the i915 perf
4309 * i915_perf_open_ioctl_locked() after taking the >->perf.lock
4318 struct i915_perf *perf = &to_i915(dev)->perf;
4325 if (!perf->i915)
4332 drm_dbg(&perf->i915->drm,
4337 ret = read_properties_unlocked(perf,
4346 mutex_lock(>->perf.lock);
4347 ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
4348 mutex_unlock(>->perf.lock);
4354 * i915_perf_register - exposes i915-perf to userspace
4359 * used to open an i915-perf stream.
4364 struct i915_perf *perf = &i915->perf;
4367 if (!perf->i915)
4374 mutex_lock(>->perf.lock);
4376 perf->metrics_kobj =
4380 mutex_unlock(>->perf.lock);
4385 * i915_perf_unregister - hide i915-perf from userspace
4388 * i915-perf state cleanup is split up into an 'unregister' and
4395 struct i915_perf *perf = &i915->perf;
4397 if (!perf->metrics_kobj)
4400 kobject_put(perf->metrics_kobj);
4401 perf->metrics_kobj = NULL;
4404 static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr)
4523 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4528 static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4534 static bool gen11_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4541 static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4547 static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4553 static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4558 static bool mtl_is_valid_oam_b_counter_addr(struct i915_perf *perf, u32 addr)
4560 if (HAS_OAM(perf->i915) &&
4561 GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
4567 static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
4571 mtl_is_valid_oam_b_counter_addr(perf, addr);
4574 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
4576 if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70))
4603 static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf,
4604 bool (*is_valid)(struct i915_perf *perf, u32 addr),
4631 if (!is_valid(perf, addr)) {
4632 drm_dbg(&perf->i915->drm,
4665 static int create_dynamic_oa_sysfs_entry(struct i915_perf *perf,
4680 return sysfs_create_group(perf->metrics_kobj,
4696 * Returns: A new allocated config number to be used with the perf open ioctl
4705 struct i915_perf *perf = &to_i915(dev)->perf;
4711 if (!perf->i915)
4714 if (!perf->metrics_kobj) {
4715 drm_dbg(&perf->i915->drm,
4721 drm_dbg(&perf->i915->drm,
4729 drm_dbg(&perf->i915->drm,
4736 drm_dbg(&perf->i915->drm,
4741 oa_config->perf = perf;
4745 drm_dbg(&perf->i915->drm,
4757 regs = alloc_oa_regs(perf,
4758 perf->ops.is_valid_mux_reg,
4763 drm_dbg(&perf->i915->drm,
4771 regs = alloc_oa_regs(perf,
4772 perf->ops.is_valid_b_counter_reg,
4777 drm_dbg(&perf->i915->drm,
4784 if (GRAPHICS_VER(perf->i915) < 8) {
4791 regs = alloc_oa_regs(perf,
4792 perf->ops.is_valid_flex_reg,
4797 drm_dbg(&perf->i915->drm,
4805 err = mutex_lock_interruptible(&perf->metrics_lock);
4812 idr_for_each_entry(&perf->metrics_idr, tmp, id) {
4814 drm_dbg(&perf->i915->drm,
4821 err = create_dynamic_oa_sysfs_entry(perf, oa_config);
4823 drm_dbg(&perf->i915->drm,
4829 oa_config->id = idr_alloc(&perf->metrics_idr,
4833 drm_dbg(&perf->i915->drm,
4840 drm_dbg(&perf->i915->drm,
4842 mutex_unlock(&perf->metrics_lock);
4847 mutex_unlock(&perf->metrics_lock);
4850 drm_dbg(&perf->i915->drm,
4870 struct i915_perf *perf = &to_i915(dev)->perf;
4875 if (!perf->i915)
4879 drm_dbg(&perf->i915->drm,
4884 ret = mutex_lock_interruptible(&perf->metrics_lock);
4888 oa_config = idr_find(&perf->metrics_idr, *arg);
4890 drm_dbg(&perf->i915->drm,
4898 sysfs_remove_group(perf->metrics_kobj, &oa_config->sysfs_metric);
4900 idr_remove(&perf->metrics_idr, *arg);
4902 mutex_unlock(&perf->metrics_lock);
4904 drm_dbg(&perf->i915->drm,
4912 mutex_unlock(&perf->metrics_lock);
5008 int i, num_groups = gt->perf.num_perf_groups;
5011 struct i915_perf_group *g = >->perf.group[i];
5048 gt->perf.num_perf_groups = num_groups;
5049 gt->perf.group = g;
5056 static int oa_init_engine_groups(struct i915_perf *perf)
5061 for_each_gt(gt, perf->i915, i) {
5070 static void oa_init_supported_formats(struct i915_perf *perf)
5072 struct drm_i915_private *i915 = perf->i915;
5077 oa_format_add(perf, I915_OA_FORMAT_A13);
5078 oa_format_add(perf, I915_OA_FORMAT_A13);
5079 oa_format_add(perf, I915_OA_FORMAT_A29);
5080 oa_format_add(perf, I915_OA_FORMAT_A13_B8_C8);
5081 oa_format_add(perf, I915_OA_FORMAT_B4_C8);
5082 oa_format_add(perf, I915_OA_FORMAT_A45_B8_C8);
5083 oa_format_add(perf, I915_OA_FORMAT_B4_C8_A16);
5084 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
5103 oa_format_add(perf, I915_OA_FORMAT_A12);
5104 oa_format_add(perf, I915_OA_FORMAT_A12_B8_C8);
5105 oa_format_add(perf, I915_OA_FORMAT_A32u40_A4u32_B8_C8);
5106 oa_format_add(perf, I915_OA_FORMAT_C4_B8);
5110 oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
5111 oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
5115 oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
5116 oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
5117 oa_format_add(perf, I915_OAM_FORMAT_MPEC8u64_B8_C8);
5118 oa_format_add(perf, I915_OAM_FORMAT_MPEC8u32_B8_C8);
5128 struct i915_perf *perf = &i915->perf;
5132 perf->ctx_oactxctrl_offset = 0x120;
5133 perf->ctx_flexeu0_offset = 0x2ce;
5134 perf->gen8_valid_ctx_bit = BIT(25);
5137 perf->ctx_oactxctrl_offset = 0x128;
5138 perf->ctx_flexeu0_offset = 0x3de;
5139 perf->gen8_valid_ctx_bit = BIT(16);
5142 perf->ctx_oactxctrl_offset = 0x124;
5143 perf->ctx_flexeu0_offset = 0x78e;
5144 perf->gen8_valid_ctx_bit = BIT(16);
5147 perf->gen8_valid_ctx_bit = BIT(16);
5150 * cache the value in perf->ctx_oactxctrl_offset.
5159 * i915_perf_init - initialize i915-perf state on module bind
5162 * Initializes i915-perf state without exposing anything to userspace.
5164 * Note: i915-perf initialization is split into an 'init' and 'register'
5169 struct i915_perf *perf = &i915->perf;
5171 perf->oa_formats = oa_formats;
5173 perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
5174 perf->ops.is_valid_mux_reg = hsw_is_valid_mux_addr;
5175 perf->ops.is_valid_flex_reg = NULL;
5176 perf->ops.enable_metric_set = hsw_enable_metric_set;
5177 perf->ops.disable_metric_set = hsw_disable_metric_set;
5178 perf->ops.oa_enable = gen7_oa_enable;
5179 perf->ops.oa_disable = gen7_oa_disable;
5180 perf->ops.read = gen7_oa_read;
5181 perf->ops.oa_hw_tail_read = gen7_oa_hw_tail_read;
5189 perf->ops.read = gen8_oa_read;
5193 perf->ops.is_valid_b_counter_reg =
5195 perf->ops.is_valid_mux_reg =
5197 perf->ops.is_valid_flex_reg =
5201 perf->ops.is_valid_mux_reg =
5205 perf->ops.oa_enable = gen8_oa_enable;
5206 perf->ops.oa_disable = gen8_oa_disable;
5207 perf->ops.enable_metric_set = gen8_enable_metric_set;
5208 perf->ops.disable_metric_set = gen8_disable_metric_set;
5209 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
5211 perf->ops.is_valid_b_counter_reg =
5213 perf->ops.is_valid_mux_reg =
5215 perf->ops.is_valid_flex_reg =
5218 perf->ops.oa_enable = gen8_oa_enable;
5219 perf->ops.oa_disable = gen8_oa_disable;
5220 perf->ops.enable_metric_set = gen8_enable_metric_set;
5221 perf->ops.disable_metric_set = gen11_disable_metric_set;
5222 perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
5224 perf->ops.is_valid_b_counter_reg =
5228 perf->ops.is_valid_mux_reg =
5230 perf->ops.is_valid_flex_reg =
5233 perf->ops.oa_enable = gen12_oa_enable;
5234 perf->ops.oa_disable = gen12_oa_disable;
5235 perf->ops.enable_metric_set = gen12_enable_metric_set;
5236 perf->ops.disable_metric_set = gen12_disable_metric_set;
5237 perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
5241 if (perf->ops.enable_metric_set) {
5246 rw_init(>->perf.lock, "perflk");
5251 rw_init(&perf->metrics_lock, "metricslk");
5252 idr_init_base(&perf->metrics_idr, 1);
5264 ratelimit_state_init(&perf->spurious_report_rs, 5 * HZ, 10);
5269 ratelimit_set_flags(&perf->spurious_report_rs,
5272 ratelimit_state_init(&perf->tail_pointer_race,
5274 ratelimit_set_flags(&perf->tail_pointer_race,
5277 atomic64_set(&perf->noa_programming_delay,
5280 perf->i915 = i915;
5282 ret = oa_init_engine_groups(perf);
5289 oa_init_supported_formats(perf);
5322 struct i915_perf *perf = &i915->perf;
5326 if (!perf->i915)
5329 for_each_gt(gt, perf->i915, i)
5330 kfree(gt->perf.group);
5332 idr_for_each(&perf->metrics_idr, destroy_config, perf);
5333 idr_destroy(&perf->metrics_idr);
5335 memset(&perf->ops, 0, sizeof(perf->ops));
5336 perf->i915 = NULL;
5340 * i915_perf_ioctl_version - Version of the i915-perf subsystem