GPU/Scheduling: Implement priority scheduling.

2024-02-10 18:56:37 +01:00 · 2024-02-10 18:56:37 +01:00 · 7a4ea8991f
commit 7a4ea8991f
parent 42f7f6b6fd
4 changed files with 218 additions and 84 deletions
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@ -13,6 +13,7 @@
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/memory.h"
 #include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
 #include "video_core/engines/puller.h"
 #include "video_core/gpu.h"
 #include "video_core/host1x/host1x.h"
@ -33,6 +34,7 @@ nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
      channel_state{system.GPU().AllocateChannel()} {
    channel_syncpoint = syncpoint_manager.AllocateSyncpoint(false);
+    channel_state->syncpoint_id = channel_syncpoint;
    sm_exception_breakpoint_int_report_event =
        events_interface.CreateEvent("GpuChannelSMExceptionBreakpointInt");
    sm_exception_breakpoint_pause_report_event =
@ -157,6 +159,9 @@ NvResult nvhost_gpu::SetErrorNotifier(IoctlSetErrorNotifier& params) {

 NvResult nvhost_gpu::SetChannelPriority(IoctlChannelSetPriority& params) {
    channel_priority = params.priority;
+    if (channel_state->initialized) {
+        system.GPU().Scheduler().ChangePriority(channel_state->bind_id, channel_priority);
+    }
    LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
    return NvResult::Success;
 }
@ -314,6 +319,7 @@ NvResult nvhost_gpu::GetWaitbase(IoctlGetWaitbase& params) {
 NvResult nvhost_gpu::ChannelSetTimeout(IoctlChannelSetTimeout& params) {
    LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);

+    channel_state->timeout = params.timeout;
    return NvResult::Success;
 }

@ -321,6 +327,7 @@ NvResult nvhost_gpu::ChannelSetTimeslice(IoctlSetTimeslice& params) {
    LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);

    channel_timeslice = params.timeslice;
+    channel_state->timeslice = params.timeslice;

    return NvResult::Success;
 }
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@ -45,6 +45,12 @@ struct ChannelState {
    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);

    s32 bind_id = -1;
+    /// Scheduling info
+    u32 syncpoint_id = 0xFFFF;
+    u32 priority = 0;
+    u32 timeslice = 0;
+    u32 timeout = 0;
+
    /// 3D engine
    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
    /// 2D engine
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@ -1,112 +1,243 @@
 // SPDX-FileCopyrightText: 2021 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-3.0-or-later

+#include <atomic>
+#include <deque>
+#include <map>
 #include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <utility>

 #include "common/assert.h"
-#include "video_core/control/channel_state.h"
+#include "common/fiber.h"
 #include "video_core/control/scheduler.h"
+#include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"

 namespace Tegra::Control {
-Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+struct GPFifoContext {
+    bool is_active;
+    bool is_running;
+    std::shared_ptr<Common::Fiber> context;
+    std::deque<CommandList> pending_work;
+    std::mutex guard;
+    s32 bind_id;
+    std::shared_ptr<ChannelState> info;
+    size_t yield_count;
+    size_t scheduled_count;
+};
+
+struct Scheduler::SchedulerImpl {
+    // Fifos
+    std::map<u32, std::list<size_t>, std::greater<u32>> schedule_priority_queue;
+    std::unordered_map<s32, size_t> channel_gpfifo_ids;
+    std::deque<GPFifoContext> gpfifos;
+    std::deque<size_t> free_fifos;
+
+    // Scheduling
+    std::mutex scheduling_guard;
+    std::shared_ptr<Common::Fiber> master_control;
+    bool must_reschedule{};
+    GPFifoContext* current_fifo{};
+};
+
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {
+    impl = std::make_unique<SchedulerImpl>();
+}

 Scheduler::~Scheduler() = default;

 void Scheduler::Init() {
-    master_control = Common::Fiber::ThreadToFiber();
+    impl->master_control = Common::Fiber::ThreadToFiber();
 }

 void Scheduler::Resume() {
-    bool nothing_pending;
-    do {
-        nothing_pending = true;
-        current_fifo = nullptr;
-        {
-            std::unique_lock lk(scheduling_guard);
-            size_t num_iters = gpfifos.size();
-            for (size_t i = 0; i < num_iters; i++) {
-                size_t current_id = (current_fifo_rotation_id + i) % gpfifos.size();
-                auto& fifo = gpfifos[current_id];
-                if (!fifo.is_active) {
+    while (UpdateHighestPriorityChannel()) {
+        impl->current_fifo->scheduled_count++;
+        Common::Fiber::YieldTo(impl->master_control, *impl->current_fifo->context);
+    }
+}
+
+bool Scheduler::UpdateHighestPriorityChannel() {
+    std::scoped_lock lk(impl->scheduling_guard);
+
+    // Clear needs to schedule state.
+    impl->must_reschedule = false;
+
+    // By default, we don't have a channel to schedule.
+    impl->current_fifo = nullptr;
+
+    // Check each level to see if we can schedule.
+    for (auto& level : impl->schedule_priority_queue) {
+        if (ScheduleLevel(level.second)) {
+            return true;
+        }
+    }
+
+    // Nothing to schedule.
+    return false;
+}
+
+bool Scheduler::ScheduleLevel(std::list<size_t>& queue) {
+    bool found_anything = false;
+    size_t min_schedule_count = std::numeric_limits<size_t>::max();
+    for (auto id : queue) {
+        auto& fifo = impl->gpfifos[id];
+        std::scoped_lock lk(fifo.guard);
+
+        // With no pending work and nothing running, this channel can't be scheduled.
+        if (fifo.pending_work.empty() && !fifo.is_running) {
            continue;
        }
-                std::scoped_lock lk2(fifo.guard);
-                if (!fifo.pending_work.empty() || fifo.working.load(std::memory_order_acquire)) {
-                    current_fifo = &fifo;
-                    current_fifo_rotation_id = current_id;
-                    nothing_pending = false;
-                    break;
+        // Prioritize channels at current priority which have been run the least.
+        if (fifo.scheduled_count > min_schedule_count) {
+            continue;
        }
+
+        // Try not to select the same channel we just yielded from.
+        if (fifo.scheduled_count < fifo.yield_count) {
+            fifo.scheduled_count++;
+            continue;
        }
+
+        // Update best selection.
+        min_schedule_count = fifo.scheduled_count;
+        impl->current_fifo = &fifo;
+        found_anything = true;
    }
-        if (current_fifo) {
-            Common::Fiber::YieldTo(master_control, *current_fifo->context);
-            current_fifo = nullptr;
+    return found_anything;
+}
+
+void Scheduler::ChangePriority(s32 channel_id, u32 new_priority) {
+    std::scoped_lock lk(impl->scheduling_guard);
+    // Ensure we are tracking this channel.
+    auto fifo_it = impl->channel_gpfifo_ids.find(channel_id);
+    if (fifo_it == impl->channel_gpfifo_ids.end()) {
+        return;
    }
-    } while (!nothing_pending);
+
+    // Get the fifo and update its priority.
+    const size_t fifo_id = fifo_it->second;
+    auto& fifo = impl->gpfifos[fifo_id];
+    const auto old_priority = std::exchange(fifo.info->priority, new_priority);
+
+    // Create the new level if needed.
+    impl->schedule_priority_queue.try_emplace(new_priority);
+
+    // Remove the old level and add to the new level.
+    impl->schedule_priority_queue[new_priority].push_back(fifo_id);
+    impl->schedule_priority_queue[old_priority].remove_if(
+        [fifo_id](size_t id) { return id == fifo_id; });
 }

 void Scheduler::Yield() {
-    ASSERT(current_fifo != nullptr);
-    Common::Fiber::YieldTo(current_fifo->context, *master_control);
-    gpu.BindChannel(current_fifo->bind_id);
+    ASSERT(impl->current_fifo != nullptr);
+
+    // Set yield count higher
+    impl->current_fifo->yield_count = impl->current_fifo->scheduled_count + 1;
+    Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
+    gpu.BindChannel(impl->current_fifo->bind_id);
+}
+
+void Scheduler::CheckStatus() {
+    {
+        std::unique_lock lk(impl->scheduling_guard);
+        // If no reschedule is needed, don't transfer control
+        if (!impl->must_reschedule) {
+            return;
+        }
+    }
+    // Transfer control to the scheduler
+    Common::Fiber::YieldTo(impl->current_fifo->context, *impl->master_control);
+    gpu.BindChannel(impl->current_fifo->bind_id);
 }

 void Scheduler::Push(s32 channel, CommandList&& entries) {
-    std::unique_lock lk(scheduling_guard);
-    auto it = channel_gpfifo_ids.find(channel);
-    ASSERT(it != channel_gpfifo_ids.end());
+    std::scoped_lock lk(impl->scheduling_guard);
+    // Get and ensure we have this channel.
+    auto it = impl->channel_gpfifo_ids.find(channel);
+    ASSERT(it != impl->channel_gpfifo_ids.end());
    auto gpfifo_id = it->second;
-    auto& fifo = gpfifos[gpfifo_id];
+    auto& fifo = impl->gpfifos[gpfifo_id];
+    // Add the new new work to the channel.
    {
        std::scoped_lock lk2(fifo.guard);
        fifo.pending_work.emplace_back(std::move(entries));
    }
+
+    // If the current running FIFO is null or the one being pushed to then
+    // just return
+    if (impl->current_fifo == nullptr || impl->current_fifo == &fifo) {
+        return;
+    }
+
+    // If the current fifo has higher or equal priority to the current fifo then return
+    if (impl->current_fifo->info->priority >= fifo.info->priority) {
+        return;
+    }
+    // Mark scheduler update as required.
+    impl->must_reschedule = true;
 }

 void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
-    gpu.BindChannel(channel_id);
-    auto& fifo = gpfifos[gpfifo_id];
-    while (true) {
-        auto* channel_state = channels[channel_id].get();
-        fifo.guard.lock();
-        while (!fifo.pending_work.empty()) {
-            {
-
-                fifo.working.store(true, std::memory_order_release);
+    auto& fifo = impl->gpfifos[gpfifo_id];
+    auto* channel_state = fifo.info.get();
+    const auto SendToPuller = [&] {
+        std::scoped_lock lk(fifo.guard);
+        if (fifo.pending_work.empty()) {
+            // Stop if no work available.
+            fifo.is_running = false;
+            return false;
+        }
+        // Otherwise, send work to puller and mark as running.
        CommandList&& entries = std::move(fifo.pending_work.front());
        channel_state->dma_pusher->Push(std::move(entries));
        fifo.pending_work.pop_front();
-            }
-            fifo.guard.unlock();
+        fifo.is_running = true;
+        // Succeed.
+        return true;
+    };
+    // Inform the GPU about the current channel.
+    gpu.BindChannel(channel_id);
+    while (true) {
+        while (SendToPuller()) {
+            // Execute.
            channel_state->dma_pusher->DispatchCalls();
-            fifo.guard.lock();
+            // Reschedule.
+            CheckStatus();
        }
-        fifo.working.store(false, std::memory_order_relaxed);
-        fifo.guard.unlock();
-        Common::Fiber::YieldTo(fifo.context, *master_control);
+        // Return to host execution when all work is completed.
+        Common::Fiber::YieldTo(fifo.context, *impl->master_control);
+        // Inform the GPU about the current channel.
        gpu.BindChannel(channel_id);
    }
 }

 void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
    s32 channel = new_channel->bind_id;
-    std::unique_lock lk(scheduling_guard);
-    channels.emplace(channel, new_channel);
+    std::unique_lock lk(impl->scheduling_guard);
+
    size_t new_fifo_id;
-    if (!free_fifos.empty()) {
-        new_fifo_id = free_fifos.front();
-        free_fifos.pop_front();
+    if (!impl->free_fifos.empty()) {
+        new_fifo_id = impl->free_fifos.front();
+        impl->free_fifos.pop_front();
    } else {
-        new_fifo_id = gpfifos.size();
-        gpfifos.emplace_back();
+        new_fifo_id = impl->gpfifos.size();
+        impl->gpfifos.emplace_back();
    }
-    auto& new_fifo = gpfifos[new_fifo_id];
-    channel_gpfifo_ids[channel] = new_fifo_id;
+    auto& new_fifo = impl->gpfifos[new_fifo_id];
+    impl->channel_gpfifo_ids[channel] = new_fifo_id;
    new_fifo.is_active = true;
    new_fifo.bind_id = channel;
    new_fifo.pending_work.clear();
+    new_fifo.info = new_channel;
+    new_fifo.scheduled_count = 0;
+    new_fifo.yield_count = 0;
+    new_fifo.is_running = false;
+    impl->schedule_priority_queue.try_emplace(new_channel->priority);
+    impl->schedule_priority_queue[new_channel->priority].push_back(new_fifo_id);
    std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
    new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
 }
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@ -3,13 +3,11 @@

 #pragma once

-#include <atomic>
-#include <deque>
+#include <list>
 #include <memory>
-#include <mutex>
-#include <unordered_map>

-#include "common/fiber.h"
+#include "common/common_types.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dma_pusher.h"

 namespace Tegra {
@ -35,26 +33,18 @@ public:

    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);

+    void ChangePriority(s32 channel_id, u32 new_priority);
+
 private:
    void ChannelLoop(size_t gpfifo_id, s32 channel_id);
+    bool ScheduleLevel(std::list<size_t>& queue);
+    void CheckStatus();
+    bool UpdateHighestPriorityChannel();
+
+    struct SchedulerImpl;
+    std::unique_ptr<SchedulerImpl> impl;

-    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
-    std::unordered_map<s32, size_t> channel_gpfifo_ids;
-    std::mutex scheduling_guard;
-    std::shared_ptr<Common::Fiber> master_control;
-    struct GPFifoContext {
-        bool is_active;
-        std::shared_ptr<Common::Fiber> context;
-        std::deque<CommandList> pending_work;
-        std::atomic<bool> working{};
-        std::mutex guard;
-        s32 bind_id;
-    };
-    std::deque<GPFifoContext> gpfifos;
-    std::deque<size_t> free_fifos;
    GPU& gpu;
-    size_t current_fifo_rotation_id{};
-    GPFifoContext* current_fifo{};
 };

 } // namespace Control