Skip to content
Snippets Groups Projects
  1. Mar 21, 2025
  2. Feb 25, 2025
  3. Dec 10, 2024
  4. Nov 12, 2024
  5. Nov 11, 2024
  6. Nov 08, 2024
  7. Nov 05, 2024
  8. Oct 15, 2024
    • SRINIVASAN SHANMUGAM's avatar
      drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring · e7457532
      SRINIVASAN SHANMUGAM authored
      
      This patch addresses a double unlock issue in the amdgpu_mes_add_ring
      function. The mutex was being unlocked twice under certain error
      conditions, which could lead to undefined behavior.
      
      The fix ensures that the mutex is unlocked only once before jumping to
      the clean_up_memory label. The unlock operation is moved to just before
      the goto statement within the conditional block that checks the return
      value of amdgpu_ring_init. This prevents the second unlock attempt after
      the clean_up_memory label, which is no longer necessary as the mutex is
      already unlocked by this point in the code flow.
      
      This change resolves the potential double unlock and maintains the
      correct mutex handling throughout the function.
      
      Fixes below:
      Commit d0c423b6 ("drm/amdgpu/mes: use ring for kernel queue
      submission"), leads to the following Smatch static checker warning:
      
      	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring()
      	warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213)
      
      drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
          1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
          1144                         int queue_type, int idx,
          1145                         struct amdgpu_mes_ctx_data *ctx_data,
          1146                         struct amdgpu_ring **out)
          1147 {
          1148         struct amdgpu_ring *ring;
          1149         struct amdgpu_mes_gang *gang;
          1150         struct amdgpu_mes_queue_properties qprops = {0};
          1151         int r, queue_id, pasid;
          1152
          1153         /*
          1154          * Avoid taking any other locks under MES lock to avoid circular
          1155          * lock dependencies.
          1156          */
          1157         amdgpu_mes_lock(&adev->mes);
          1158         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
          1159         if (!gang) {
          1160                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
          1161                 amdgpu_mes_unlock(&adev->mes);
          1162                 return -EINVAL;
          1163         }
          1164         pasid = gang->process->pasid;
          1165
          1166         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
          1167         if (!ring) {
          1168                 amdgpu_mes_unlock(&adev->mes);
          1169                 return -ENOMEM;
          1170         }
          1171
          1172         ring->ring_obj = NULL;
          1173         ring->use_doorbell = true;
          1174         ring->is_mes_queue = true;
          1175         ring->mes_ctx = ctx_data;
          1176         ring->idx = idx;
          1177         ring->no_scheduler = true;
          1178
          1179         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
          1180                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
          1181                                       compute[ring->idx].mec_hpd);
          1182                 ring->eop_gpu_addr =
          1183                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
          1184         }
          1185
          1186         switch (queue_type) {
          1187         case AMDGPU_RING_TYPE_GFX:
          1188                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
          1189                 ring->me = adev->gfx.gfx_ring[0].me;
          1190                 ring->pipe = adev->gfx.gfx_ring[0].pipe;
          1191                 break;
          1192         case AMDGPU_RING_TYPE_COMPUTE:
          1193                 ring->funcs = adev->gfx.compute_ring[0].funcs;
          1194                 ring->me = adev->gfx.compute_ring[0].me;
          1195                 ring->pipe = adev->gfx.compute_ring[0].pipe;
          1196                 break;
          1197         case AMDGPU_RING_TYPE_SDMA:
          1198                 ring->funcs = adev->sdma.instance[0].ring.funcs;
          1199                 break;
          1200         default:
          1201                 BUG();
          1202         }
          1203
          1204         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
          1205                              AMDGPU_RING_PRIO_DEFAULT, NULL);
          1206         if (r)
          1207                 goto clean_up_memory;
          1208
          1209         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
          1210
          1211         dma_fence_wait(gang->process->vm->last_update, false);
          1212         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
          1213         amdgpu_mes_unlock(&adev->mes);
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      
          1214
          1215         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
          1216         if (r)
          1217                 goto clean_up_ring;
                               ^^^^^^^^^^^^^^^^^^
      
          1218
          1219         ring->hw_queue_id = queue_id;
          1220         ring->doorbell_index = qprops.doorbell_off;
          1221
          1222         if (queue_type == AMDGPU_RING_TYPE_GFX)
          1223                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
          1224         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
          1225                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
          1226                         queue_id);
          1227         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
          1228                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
          1229                         queue_id);
          1230         else
          1231                 BUG();
          1232
          1233         *out = ring;
          1234         return 0;
          1235
          1236 clean_up_ring:
          1237         amdgpu_ring_fini(ring);
          1238 clean_up_memory:
          1239         kfree(ring);
      --> 1240         amdgpu_mes_unlock(&adev->mes);
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      
          1241         return r;
          1242 }
      
      Fixes: d0c423b6 ("drm/amdgpu/mes: use ring for kernel queue submission")
      Cc: Christian König <christian.koenig@amd.com>
      Cc: Alex Deucher <alexander.deucher@amd.com>
      Cc: Hawking Zhang <Hawking.Zhang@amd.com>
      Suggested-by: default avatarJack Xiao <Jack.Xiao@amd.com>
      Reported by: Dan Carpenter <dan.carpenter@linaro.org>
      Signed-off-by: default avatarSrinivasan Shanmugam <srinivasan.shanmugam@amd.com>
      Reviewed-by: default avatarJack Xiao <Jack.Xiao@amd.com>
      Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      (cherry picked from commit bfaf1883)
      e7457532
    • SRINIVASAN SHANMUGAM's avatar
      drm/amd/amdgpu: Fix double unlock in amdgpu_mes_add_ring · bfaf1883
      SRINIVASAN SHANMUGAM authored
      
      This patch addresses a double unlock issue in the amdgpu_mes_add_ring
      function. The mutex was being unlocked twice under certain error
      conditions, which could lead to undefined behavior.
      
      The fix ensures that the mutex is unlocked only once before jumping to
      the clean_up_memory label. The unlock operation is moved to just before
      the goto statement within the conditional block that checks the return
      value of amdgpu_ring_init. This prevents the second unlock attempt after
      the clean_up_memory label, which is no longer necessary as the mutex is
      already unlocked by this point in the code flow.
      
      This change resolves the potential double unlock and maintains the
      correct mutex handling throughout the function.
      
      Fixes below:
      Commit d0c423b6 ("drm/amdgpu/mes: use ring for kernel queue
      submission"), leads to the following Smatch static checker warning:
      
      	drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c:1240 amdgpu_mes_add_ring()
      	warn: double unlock '&adev->mes.mutex_hidden' (orig line 1213)
      
      drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
          1143 int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
          1144                         int queue_type, int idx,
          1145                         struct amdgpu_mes_ctx_data *ctx_data,
          1146                         struct amdgpu_ring **out)
          1147 {
          1148         struct amdgpu_ring *ring;
          1149         struct amdgpu_mes_gang *gang;
          1150         struct amdgpu_mes_queue_properties qprops = {0};
          1151         int r, queue_id, pasid;
          1152
          1153         /*
          1154          * Avoid taking any other locks under MES lock to avoid circular
          1155          * lock dependencies.
          1156          */
          1157         amdgpu_mes_lock(&adev->mes);
          1158         gang = idr_find(&adev->mes.gang_id_idr, gang_id);
          1159         if (!gang) {
          1160                 DRM_ERROR("gang id %d doesn't exist\n", gang_id);
          1161                 amdgpu_mes_unlock(&adev->mes);
          1162                 return -EINVAL;
          1163         }
          1164         pasid = gang->process->pasid;
          1165
          1166         ring = kzalloc(sizeof(struct amdgpu_ring), GFP_KERNEL);
          1167         if (!ring) {
          1168                 amdgpu_mes_unlock(&adev->mes);
          1169                 return -ENOMEM;
          1170         }
          1171
          1172         ring->ring_obj = NULL;
          1173         ring->use_doorbell = true;
          1174         ring->is_mes_queue = true;
          1175         ring->mes_ctx = ctx_data;
          1176         ring->idx = idx;
          1177         ring->no_scheduler = true;
          1178
          1179         if (queue_type == AMDGPU_RING_TYPE_COMPUTE) {
          1180                 int offset = offsetof(struct amdgpu_mes_ctx_meta_data,
          1181                                       compute[ring->idx].mec_hpd);
          1182                 ring->eop_gpu_addr =
          1183                         amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
          1184         }
          1185
          1186         switch (queue_type) {
          1187         case AMDGPU_RING_TYPE_GFX:
          1188                 ring->funcs = adev->gfx.gfx_ring[0].funcs;
          1189                 ring->me = adev->gfx.gfx_ring[0].me;
          1190                 ring->pipe = adev->gfx.gfx_ring[0].pipe;
          1191                 break;
          1192         case AMDGPU_RING_TYPE_COMPUTE:
          1193                 ring->funcs = adev->gfx.compute_ring[0].funcs;
          1194                 ring->me = adev->gfx.compute_ring[0].me;
          1195                 ring->pipe = adev->gfx.compute_ring[0].pipe;
          1196                 break;
          1197         case AMDGPU_RING_TYPE_SDMA:
          1198                 ring->funcs = adev->sdma.instance[0].ring.funcs;
          1199                 break;
          1200         default:
          1201                 BUG();
          1202         }
          1203
          1204         r = amdgpu_ring_init(adev, ring, 1024, NULL, 0,
          1205                              AMDGPU_RING_PRIO_DEFAULT, NULL);
          1206         if (r)
          1207                 goto clean_up_memory;
          1208
          1209         amdgpu_mes_ring_to_queue_props(adev, ring, &qprops);
          1210
          1211         dma_fence_wait(gang->process->vm->last_update, false);
          1212         dma_fence_wait(ctx_data->meta_data_va->last_pt_update, false);
          1213         amdgpu_mes_unlock(&adev->mes);
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      
          1214
          1215         r = amdgpu_mes_add_hw_queue(adev, gang_id, &qprops, &queue_id);
          1216         if (r)
          1217                 goto clean_up_ring;
                               ^^^^^^^^^^^^^^^^^^
      
          1218
          1219         ring->hw_queue_id = queue_id;
          1220         ring->doorbell_index = qprops.doorbell_off;
          1221
          1222         if (queue_type == AMDGPU_RING_TYPE_GFX)
          1223                 sprintf(ring->name, "gfx_%d.%d.%d", pasid, gang_id, queue_id);
          1224         else if (queue_type == AMDGPU_RING_TYPE_COMPUTE)
          1225                 sprintf(ring->name, "compute_%d.%d.%d", pasid, gang_id,
          1226                         queue_id);
          1227         else if (queue_type == AMDGPU_RING_TYPE_SDMA)
          1228                 sprintf(ring->name, "sdma_%d.%d.%d", pasid, gang_id,
          1229                         queue_id);
          1230         else
          1231                 BUG();
          1232
          1233         *out = ring;
          1234         return 0;
          1235
          1236 clean_up_ring:
          1237         amdgpu_ring_fini(ring);
          1238 clean_up_memory:
          1239         kfree(ring);
      --> 1240         amdgpu_mes_unlock(&adev->mes);
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      
          1241         return r;
          1242 }
      
      Fixes: d0c423b6 ("drm/amdgpu/mes: use ring for kernel queue submission")
      Cc: Christian König <christian.koenig@amd.com>
      Cc: Alex Deucher <alexander.deucher@amd.com>
      Cc: Hawking Zhang <Hawking.Zhang@amd.com>
      Suggested-by: default avatarJack Xiao <Jack.Xiao@amd.com>
      Reported by: Dan Carpenter <dan.carpenter@linaro.org>
      Signed-off-by: default avatarSrinivasan Shanmugam <srinivasan.shanmugam@amd.com>
      Reviewed-by: default avatarJack Xiao <Jack.Xiao@amd.com>
      Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      bfaf1883
  9. Sep 26, 2024
  10. Sep 06, 2024
  11. Sep 02, 2024
  12. Aug 21, 2024
  13. Aug 16, 2024
  14. Aug 13, 2024
  15. Jul 27, 2024
  16. Jun 19, 2024
    • Alex Deucher's avatar
      drm/amdgpu: remove amdgpu_mes_fence_wait_polling() · 19797687
      Alex Deucher authored
      
      No longer used so remove it.
      
      Reviewed-by: default avatarMukul Joshi <mukul.joshi@amd.com>
      Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      19797687
    • Mukul Joshi's avatar
      Revert "drm/amdgpu: Add missing locking for MES API calls" · 4d14a740
      Mukul Joshi authored
      
      This reverts commit 36127028.
      
      This is causing a BUG message during suspend.
      
      [   61.603542] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283
      [   61.603550] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2028, name: kworker/u64:14
      [   61.603553] preempt_count: 1, expected: 0
      [   61.603555] RCU nest depth: 0, expected: 0
      [   61.603557] Preemption disabled at:
      [   61.603559] [<ffffffffc08a3261>] amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu]
      [   61.603789] CPU: 9 PID: 2028 Comm: kworker/u64:14 Tainted: G        W          6.8.0+ #7
      [   61.603795] Workqueue: events_unbound async_run_entry_fn
      [   61.603801] Call Trace:
      [   61.603803]  <TASK>
      [   61.603806]  dump_stack_lvl+0x37/0x50
      [   61.603811]  ? amdgpu_gfx_disable_kgq+0x61/0x160 [amdgpu]
      [   61.604007]  dump_stack+0x10/0x20
      [   61.604010]  __might_resched+0x16f/0x1d0
      [   61.604016]  __might_sleep+0x43/0x70
      [   61.604020]  mutex_lock+0x1f/0x60
      [   61.604024]  amdgpu_mes_unmap_legacy_queue+0x6d/0x100 [amdgpu]
      [   61.604226]  gfx11_kiq_unmap_queues+0x3dc/0x430 [amdgpu]
      [   61.604422]  ? srso_alias_return_thunk+0x5/0xfbef5
      [   61.604429]  amdgpu_gfx_disable_kgq+0x122/0x160 [amdgpu]
      [   61.604621]  gfx_v11_0_hw_fini+0xda/0x100 [amdgpu]
      [   61.604814]  gfx_v11_0_suspend+0xe/0x20 [amdgpu]
      [   61.605008]  amdgpu_device_ip_suspend_phase2+0x135/0x1d0 [amdgpu]
      [   61.605175]  amdgpu_device_suspend+0xec/0x180 [amdgpu]
      
      Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
      Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
      4d14a740
  17. Jun 14, 2024
  18. May 02, 2024
  19. Apr 26, 2024
  20. Apr 24, 2024
Loading