@@ -156,6 +156,11 @@ struct amdgpu_init_level amdgpu_init_default = {
156156 .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL ,
157157};
158158
159+ struct amdgpu_init_level amdgpu_init_recovery = {
160+ .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY ,
161+ .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL ,
162+ };
163+
159164/*
160165 * Minimal blocks needed to be initialized before a XGMI hive can be reset. This
161166 * is used for cases like reset on initialization where the entire hive needs to
@@ -182,6 +187,9 @@ void amdgpu_set_init_level(struct amdgpu_device *adev,
182187 case AMDGPU_INIT_LEVEL_MINIMAL_XGMI :
183188 adev -> init_lvl = & amdgpu_init_minimal_xgmi ;
184189 break ;
190+ case AMDGPU_INIT_LEVEL_RESET_RECOVERY :
191+ adev -> init_lvl = & amdgpu_init_recovery ;
192+ break ;
185193 case AMDGPU_INIT_LEVEL_DEFAULT :
186194 fallthrough ;
187195 default :
@@ -3250,7 +3258,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
32503258 return r ;
32513259 }
32523260
3253- if (!amdgpu_in_reset (adev ))
3261+ if (!amdgpu_reset_in_recovery (adev ))
32543262 amdgpu_ras_set_error_query_ready (adev , true);
32553263
32563264 amdgpu_device_set_cg_state (adev , AMD_CG_STATE_GATE );
@@ -4669,8 +4677,8 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
46694677 int idx ;
46704678 bool px ;
46714679
4672- amdgpu_fence_driver_sw_fini (adev );
46734680 amdgpu_device_ip_fini (adev );
4681+ amdgpu_fence_driver_sw_fini (adev );
46744682 amdgpu_ucode_release (& adev -> firmware .gpu_info_fw );
46754683 adev -> accel_working = false;
46764684 dma_fence_put (rcu_dereference_protected (adev -> gang_submit , true));
@@ -5419,7 +5427,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
54195427 struct list_head * device_list_handle ;
54205428 bool full_reset , vram_lost = false;
54215429 struct amdgpu_device * tmp_adev ;
5422- int r ;
5430+ int r , init_level ;
54235431
54245432 device_list_handle = reset_context -> reset_device_list ;
54255433
@@ -5428,10 +5436,18 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
54285436
54295437 full_reset = test_bit (AMDGPU_NEED_FULL_RESET , & reset_context -> flags );
54305438
5439+ /**
5440+ * If it's reset on init, it's default init level, otherwise keep level
5441+ * as recovery level.
5442+ */
5443+ if (reset_context -> method == AMD_RESET_METHOD_ON_INIT )
5444+ init_level = AMDGPU_INIT_LEVEL_DEFAULT ;
5445+ else
5446+ init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY ;
5447+
54315448 r = 0 ;
54325449 list_for_each_entry (tmp_adev , device_list_handle , reset_list ) {
5433- /* After reset, it's default init level */
5434- amdgpu_set_init_level (tmp_adev , AMDGPU_INIT_LEVEL_DEFAULT );
5450+ amdgpu_set_init_level (tmp_adev , init_level );
54355451 if (full_reset ) {
54365452 /* post card */
54375453 amdgpu_ras_set_fed (tmp_adev , false);
@@ -5518,6 +5534,9 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
55185534
55195535out :
55205536 if (!r ) {
5537+ /* IP init is complete now, set level as default */
5538+ amdgpu_set_init_level (tmp_adev ,
5539+ AMDGPU_INIT_LEVEL_DEFAULT );
55215540 amdgpu_irq_gpu_reset_resume_helper (tmp_adev );
55225541 r = amdgpu_ib_ring_tests (tmp_adev );
55235542 if (r ) {
0 commit comments