mirror of
https://github.com/torvalds/linux.git
synced 2026-01-25 15:03:52 +08:00
accel/habanalabs/gaudi2: assume hard-reset by FW upon MC SEI severe error
FW initiates a hard reset upon an MC SEI severe error. Align the driver to expect this reset and avoid accessing the device until the reset is done. Signed-off-by: Tomer Tayar <ttayar@habana.ai> Reviewed-by: Ofir Bitton <obitton@habana.ai> Signed-off-by: Ofir Bitton <obitton@habana.ai>
This commit is contained in:
@@ -10004,6 +10004,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
|
||||
reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
|
||||
reset_required = true;
|
||||
is_critical = eq_entry->sei_data.hdr.is_critical;
|
||||
}
|
||||
error_count++;
|
||||
break;
|
||||
@@ -10235,8 +10236,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
|
||||
gaudi2_print_event(hdev, event_type, true,
|
||||
"No error cause for H/W event %u", event_type);
|
||||
|
||||
if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) ||
|
||||
reset_required) {
|
||||
if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
|
||||
if (reset_required ||
|
||||
(gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
|
||||
reset_flags |= HL_DRV_RESET_HARD;
|
||||
|
||||
Reference in New Issue
Block a user