GPU: Implement channel scheduling.

This commit is contained in:
Fernando Sahmkow 2024-01-05 01:51:42 +01:00
parent fe6934593f
commit 42f7f6b6fd
7 changed files with 149 additions and 22 deletions

View File

@ -13,20 +13,102 @@ Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
Scheduler::~Scheduler() = default; Scheduler::~Scheduler() = default;
void Scheduler::Init() {
master_control = Common::Fiber::ThreadToFiber();
}
void Scheduler::Resume() {
bool nothing_pending;
do {
nothing_pending = true;
current_fifo = nullptr;
{
std::unique_lock lk(scheduling_guard);
size_t num_iters = gpfifos.size();
for (size_t i = 0; i < num_iters; i++) {
size_t current_id = (current_fifo_rotation_id + i) % gpfifos.size();
auto& fifo = gpfifos[current_id];
if (!fifo.is_active) {
continue;
}
std::scoped_lock lk2(fifo.guard);
if (!fifo.pending_work.empty() || fifo.working.load(std::memory_order_acquire)) {
current_fifo = &fifo;
current_fifo_rotation_id = current_id;
nothing_pending = false;
break;
}
}
}
if (current_fifo) {
Common::Fiber::YieldTo(master_control, *current_fifo->context);
current_fifo = nullptr;
}
} while (!nothing_pending);
}
void Scheduler::Yield() {
ASSERT(current_fifo != nullptr);
Common::Fiber::YieldTo(current_fifo->context, *master_control);
gpu.BindChannel(current_fifo->bind_id);
}
void Scheduler::Push(s32 channel, CommandList&& entries) { void Scheduler::Push(s32 channel, CommandList&& entries) {
std::unique_lock lk(scheduling_guard); std::unique_lock lk(scheduling_guard);
auto it = channels.find(channel); auto it = channel_gpfifo_ids.find(channel);
ASSERT(it != channels.end()); ASSERT(it != channel_gpfifo_ids.end());
auto channel_state = it->second; auto gpfifo_id = it->second;
gpu.BindChannel(channel_state->bind_id); auto& fifo = gpfifos[gpfifo_id];
channel_state->dma_pusher->Push(std::move(entries)); {
channel_state->dma_pusher->DispatchCalls(); std::scoped_lock lk2(fifo.guard);
fifo.pending_work.emplace_back(std::move(entries));
}
}
void Scheduler::ChannelLoop(size_t gpfifo_id, s32 channel_id) {
gpu.BindChannel(channel_id);
auto& fifo = gpfifos[gpfifo_id];
while (true) {
auto* channel_state = channels[channel_id].get();
fifo.guard.lock();
while (!fifo.pending_work.empty()) {
{
fifo.working.store(true, std::memory_order_release);
CommandList&& entries = std::move(fifo.pending_work.front());
channel_state->dma_pusher->Push(std::move(entries));
fifo.pending_work.pop_front();
}
fifo.guard.unlock();
channel_state->dma_pusher->DispatchCalls();
fifo.guard.lock();
}
fifo.working.store(false, std::memory_order_relaxed);
fifo.guard.unlock();
Common::Fiber::YieldTo(fifo.context, *master_control);
gpu.BindChannel(channel_id);
}
} }
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) { void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
s32 channel = new_channel->bind_id; s32 channel = new_channel->bind_id;
std::unique_lock lk(scheduling_guard); std::unique_lock lk(scheduling_guard);
channels.emplace(channel, new_channel); channels.emplace(channel, new_channel);
size_t new_fifo_id;
if (!free_fifos.empty()) {
new_fifo_id = free_fifos.front();
free_fifos.pop_front();
} else {
new_fifo_id = gpfifos.size();
gpfifos.emplace_back();
}
auto& new_fifo = gpfifos[new_fifo_id];
channel_gpfifo_ids[channel] = new_fifo_id;
new_fifo.is_active = true;
new_fifo.bind_id = channel;
new_fifo.pending_work.clear();
std::function<void()> callback = std::bind(&Scheduler::ChannelLoop, this, new_fifo_id, channel);
new_fifo.context = std::make_shared<Common::Fiber>(std::move(callback));
} }
} // namespace Tegra::Control } // namespace Tegra::Control

View File

@ -3,10 +3,13 @@
#pragma once #pragma once
#include <atomic>
#include <deque>
#include <memory> #include <memory>
#include <mutex> #include <mutex>
#include <unordered_map> #include <unordered_map>
#include "common/fiber.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
namespace Tegra { namespace Tegra {
@ -22,14 +25,36 @@ public:
explicit Scheduler(GPU& gpu_); explicit Scheduler(GPU& gpu_);
~Scheduler(); ~Scheduler();
void Init();
void Resume();
void Yield();
void Push(s32 channel, CommandList&& entries); void Push(s32 channel, CommandList&& entries);
void DeclareChannel(std::shared_ptr<ChannelState> new_channel); void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
private: private:
void ChannelLoop(size_t gpfifo_id, s32 channel_id);
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels; std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
std::unordered_map<s32, size_t> channel_gpfifo_ids;
std::mutex scheduling_guard; std::mutex scheduling_guard;
std::shared_ptr<Common::Fiber> master_control;
struct GPFifoContext {
bool is_active;
std::shared_ptr<Common::Fiber> context;
std::deque<CommandList> pending_work;
std::atomic<bool> working{};
std::mutex guard;
s32 bind_id;
};
std::deque<GPFifoContext> gpfifos;
std::deque<size_t> free_fifos;
GPU& gpu; GPU& gpu;
size_t current_fifo_rotation_id{};
GPFifoContext* current_fifo{};
}; };
} // namespace Control } // namespace Control

View File

@ -6,6 +6,7 @@
#include "common/settings.h" #include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "video_core/control/channel_state.h" #include "video_core/control/channel_state.h"
#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
@ -14,6 +15,8 @@
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/engines/puller.h" #include "video_core/engines/puller.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/host1x/host1x.h"
#include "video_core/host1x/syncpoint_manager.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
@ -60,11 +63,14 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) {
} }
void Puller::ProcessFenceActionMethod() { void Puller::ProcessFenceActionMethod() {
auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager();
switch (regs.fence_action.op) { switch (regs.fence_action.op) {
case Puller::FenceOperation::Acquire: case Puller::FenceOperation::Acquire:
// UNIMPLEMENTED_MSG("Channel Scheduling pending."); while (regs.fence_value >
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); syncpoint_manager.GetGuestSyncpointValue(regs.fence_action.syncpoint_id)) {
rasterizer->ReleaseFences(); rasterizer->ReleaseFences();
gpu.Scheduler().Yield();
}
break; break;
case Puller::FenceOperation::Increment: case Puller::FenceOperation::Increment:
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);

View File

@ -401,6 +401,14 @@ std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
return impl->AllocateChannel(); return impl->AllocateChannel();
} }
Tegra::Control::Scheduler& GPU::Scheduler() {
return *impl->scheduler;
}
const Tegra::Control::Scheduler& GPU::Scheduler() const {
return *impl->scheduler;
}
void GPU::InitChannel(Control::ChannelState& to_init) { void GPU::InitChannel(Control::ChannelState& to_init) {
impl->InitChannel(to_init); impl->InitChannel(to_init);
} }

View File

@ -124,7 +124,8 @@ class KeplerCompute;
namespace Control { namespace Control {
struct ChannelState; struct ChannelState;
} class Scheduler;
} // namespace Control
namespace Host1x { namespace Host1x {
class Host1x; class Host1x;
@ -204,6 +205,12 @@ public:
/// Returns a const reference to the shader notifier. /// Returns a const reference to the shader notifier.
[[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const; [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const;
/// Returns GPU Channel Scheduler.
[[nodiscard]] Tegra::Control::Scheduler& Scheduler();
/// Returns GPU Channel Scheduler.
[[nodiscard]] const Tegra::Control::Scheduler& Scheduler() const;
[[nodiscard]] u64 GetTicks() const; [[nodiscard]] u64 GetTicks() const;
[[nodiscard]] bool IsAsync() const; [[nodiscard]] bool IsAsync() const;

View File

@ -33,13 +33,15 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
CommandDataContainer next; CommandDataContainer next;
scheduler.Init();
while (!stop_token.stop_requested()) { while (!stop_token.stop_requested()) {
state.queue.PopWait(next, stop_token); state.queue.PopWait(next, stop_token);
if (stop_token.stop_requested()) { if (stop_token.stop_requested()) {
break; break;
} }
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { if (std::holds_alternative<SubmitListCommand>(next.data)) {
scheduler.Push(submit_list->channel, std::move(submit_list->entries)); scheduler.Resume();
} else if (std::holds_alternative<GPUTickCommand>(next.data)) { } else if (std::holds_alternative<GPUTickCommand>(next.data)) {
system.GPU().TickWork(); system.GPU().TickWork();
} else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
@ -66,14 +68,16 @@ ThreadManager::~ThreadManager() = default;
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Core::Frontend::GraphicsContext& context,
Tegra::Control::Scheduler& scheduler) { Tegra::Control::Scheduler& scheduler_) {
rasterizer = renderer.ReadRasterizer(); rasterizer = renderer.ReadRasterizer();
scheduler = &scheduler_;
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
std::ref(scheduler), std::ref(state)); std::ref(scheduler_), std::ref(state));
} }
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
PushCommand(SubmitListCommand(channel, std::move(entries))); scheduler->Push(channel, std::move(entries));
PushCommand(SubmitListCommand());
} }
void ThreadManager::FlushRegion(DAddr addr, u64 size) { void ThreadManager::FlushRegion(DAddr addr, u64 size) {

View File

@ -36,13 +36,7 @@ class RendererBase;
namespace VideoCommon::GPUThread { namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing /// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final { struct SubmitListCommand final {};
explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
: channel{channel_}, entries{std::move(entries_)} {}
s32 channel;
Tegra::CommandList entries;
};
/// Command to signal to the GPU thread to flush a region /// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final { struct FlushRegionCommand final {
@ -124,6 +118,7 @@ public:
private: private:
/// Pushes a command to be executed by the GPU thread /// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data, bool block = false); u64 PushCommand(CommandData&& command_data, bool block = false);
Tegra::Control::Scheduler* scheduler;
Core::System& system; Core::System& system;
const bool is_async; const bool is_async;