Lines Matching defs:ras

47 static const char *RAS_FS_NAME = "ras";
85 /* ras block link */
185 dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n");
415 * echo "disable <block>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
416 * echo "enable <block> <error>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
417 * echo "inject <block> <error> <sub-block> <address> <value> <mask>" > /sys/kernel/debug/dri/<N>/ras/ras_ctrl
440 * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl
441 * echo inject umc ce 0 0 0 3 > /sys/kernel/debug/dri/0/ras/ras_ctrl
442 * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl
446 * To check disable/enable, see "ras" features at,
447 * /sys/class/drm/card[0/1/2...]/device/ras/features
450 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx|sdma|umc|...]_err_count
454 * Check the "ras" mask at /sys/module/amdgpu/parameters/ras_mask
541 * echo 1 > ../ras/ras_eeprom_reset
585 * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
736 /* If hardware does not support ras, then do not create obj.
737 * But if hardware support ras, we can create the obj.
740 * IP checks con->support to see if it need disable ras.
776 /* Do not enable ras feature if it is not allowed */
782 /* Only enable gfx ras feature from host side */
804 dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n",
833 /* There is no harm to issue a ras TA cmd regardless of
834 * the currecnt ras state.
840 /* With old ras TA, we might fail to enable ras.
852 /* setup the object then issue a ras TA disable cmd.*/
857 /* gfx block ras dsiable cmd must send to ras-ta */
863 /* clean gfx block ras features flag */
914 * bypass psp. vbios enable ras for us.
934 * bypass psp. vbios enable ras for us.
972 dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
991 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
998 ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
1000 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1001 adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
1002 adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);
1007 if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
1008 adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
1009 adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
1011 if (adev->umc.ras &&
1012 adev->umc.ras->ecc_info_query_ras_error_count)
1013 adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);
1015 if (adev->umc.ras &&
1016 adev->umc.ras->ecc_info_query_ras_error_address)
1017 adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
1182 dev_err(adev->dev, "ras inject %s failed %d\n",
1236 * all the ip blocks that support query ras error counters/status
1263 /* query all the ip blocks that support ras query interface */
1313 * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
1497 * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot
1503 * echo true > .../ras/auto_reboot
1543 * of RAS IPs during ras recovery.
1598 /* ras fs */
1661 /* ras fs end */
1676 if (adev->nbio.ras &&
1677 adev->nbio.ras->handle_ras_controller_intr_no_bifring)
1678 adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
1680 if (adev->nbio.ras &&
1681 adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
1682 adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
1858 /* in case we registe the IH before enable ras feature */
1922 * PCIE_BIF IP has one different isr by ras controller
1923 * interrupt, the specific ras counter query will be
2048 struct amdgpu_ras *ras =
2051 struct amdgpu_device *adev = ras->adev;
2054 if (!ras->disable_ras_err_cnt_harvest) {
2075 if (amdgpu_device_should_recover_gpu(ras->adev)) {
2083 if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
2088 if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE2_RESET) {
2089 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE2_RESET;
2096 if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) {
2097 ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
2104 amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
2106 atomic_set(&ras->in_recovery, 0);
2226 &adev->psp.ras_context.ras->eeprom_control;
2288 * Justification of value bad_page_cnt_threshold in ras structure
2394 dev_warn(adev->dev, "Failed to initialize ras recovery! (%d)\n", ret);
2461 * force enable gfx ras, ignore vbios gfx ras flag
2479 * check hardware's ras ability which will be saved in hw_supported.
2480 * if hardware does not support ras, we can skip some ras initializtion and
2481 * forbid some ras operations from IP.
2482 * if software itself, say boot parameter, limit the ras ability. We still
2484 * we have to initialize ras as normal. but need check if operation is
2548 * Disable ras feature for aqua vanjaram
2600 adev->umc.ras &&
2601 adev->umc.ras->query_ras_poison_mode) {
2605 adev->umc.ras->query_ras_poison_mode(adev);
2644 /* set gfx block ras context feature for VEGA20 Gaming
2645 * send ras disable cmd to ras ta during ras late init.
2663 /* initialize nbio ras function ahead of any other
2664 * ras functions so hardware fatal error interrupt
2671 adev->nbio.ras = &nbio_v7_4_ras;
2675 /* unlike other generation of nbio ras,
2679 * enable nbio ras in such case. Instead,
2681 adev->nbio.ras = &nbio_v4_3_ras;
2685 adev->nbio.ras = &nbio_v7_9_ras;
2688 /* nbio ras is not available */
2692 /* nbio ras block needs to be enabled ahead of other ras blocks
2698 if (adev->nbio.ras &&
2699 adev->nbio.ras->init_ras_controller_interrupt) {
2700 r = adev->nbio.ras->init_ras_controller_interrupt(adev);
2705 if (adev->nbio.ras &&
2706 adev->nbio.ras->init_ras_err_event_athub_interrupt) {
2707 r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev);
2719 dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
2787 /* in resume phase, if fail to enable ras,
2788 * clean up all ras fs nodes, and disable ras */
2797 /* in resume phase, no need to create ras fs node */
2848 /* helper function to remove ras fs node and interrupt handler */
2878 /* clean ras context for VEGA20 Gaming after send ras disable cmd */
2886 * tricky thing that IP's actual ras error type should be
2892 /* We enable ras on all hw_supported block, but as boot
2914 /* Make sure all ras objects are disabled. */
2925 /* Guest side doesn't need init ras feature */
2931 dev_warn(adev->dev, "Warning: abnormal ras list node.\n");
2959 /* Need disable ras on all IPs here before ip [hw/sw]fini */
2985 /* Clear ras blocks from ras_list and free ras block list node */
3009 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3014 ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
3151 return adev->psp.ras_context.ras;
3159 adev->psp.ras_context.ras = ras_con;
3163 /* check if ras is supported on block, say, sdma, gfx */
3168 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3173 ret = ras && (adev->ras_enabled & (1 << block));
3176 * not enabled, even if the ras block is not supported on
3178 * ras block has ras configuration, it can be considered
3179 * that the ras block supports ras function.
3195 struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
3197 if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)
3198 amdgpu_reset_domain_schedule(ras->adev->reset_domain, &ras->recovery_work);
3203 /* Register each ip ras block into amdgpu ras */