Merge pull request #3490 from ReinUsesLisp/transform-feedbacks
video_core: Initial implementation of transform feedbacks
This commit is contained in:
commit
35145bd529
@ -102,6 +102,8 @@ set(HASH_FILES
|
|||||||
"${VIDEO_CORE}/shader/shader_ir.cpp"
|
"${VIDEO_CORE}/shader/shader_ir.cpp"
|
||||||
"${VIDEO_CORE}/shader/shader_ir.h"
|
"${VIDEO_CORE}/shader/shader_ir.h"
|
||||||
"${VIDEO_CORE}/shader/track.cpp"
|
"${VIDEO_CORE}/shader/track.cpp"
|
||||||
|
"${VIDEO_CORE}/shader/transform_feedback.cpp"
|
||||||
|
"${VIDEO_CORE}/shader/transform_feedback.h"
|
||||||
)
|
)
|
||||||
set(COMBINED "")
|
set(COMBINED "")
|
||||||
foreach (F IN LISTS HASH_FILES)
|
foreach (F IN LISTS HASH_FILES)
|
||||||
|
@ -83,6 +83,8 @@ add_custom_command(OUTPUT scm_rev.cpp
|
|||||||
"${VIDEO_CORE}/shader/shader_ir.cpp"
|
"${VIDEO_CORE}/shader/shader_ir.cpp"
|
||||||
"${VIDEO_CORE}/shader/shader_ir.h"
|
"${VIDEO_CORE}/shader/shader_ir.h"
|
||||||
"${VIDEO_CORE}/shader/track.cpp"
|
"${VIDEO_CORE}/shader/track.cpp"
|
||||||
|
"${VIDEO_CORE}/shader/transform_feedback.cpp"
|
||||||
|
"${VIDEO_CORE}/shader/transform_feedback.h"
|
||||||
# and also check that the scm_rev files haven't changed
|
# and also check that the scm_rev files haven't changed
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
|
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
|
||||||
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
|
"${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
|
||||||
|
@ -129,6 +129,8 @@ add_library(video_core STATIC
|
|||||||
shader/shader_ir.cpp
|
shader/shader_ir.cpp
|
||||||
shader/shader_ir.h
|
shader/shader_ir.h
|
||||||
shader/track.cpp
|
shader/track.cpp
|
||||||
|
shader/transform_feedback.cpp
|
||||||
|
shader/transform_feedback.h
|
||||||
surface.cpp
|
surface.cpp
|
||||||
surface.h
|
surface.h
|
||||||
texture_cache/format_lookup_table.cpp
|
texture_cache/format_lookup_table.cpp
|
||||||
|
@ -628,19 +628,26 @@ public:
|
|||||||
float depth_range_far;
|
float depth_range_far;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct alignas(32) TransformFeedbackBinding {
|
struct TransformFeedbackBinding {
|
||||||
u32 buffer_enable;
|
u32 buffer_enable;
|
||||||
u32 address_high;
|
u32 address_high;
|
||||||
u32 address_low;
|
u32 address_low;
|
||||||
s32 buffer_size;
|
s32 buffer_size;
|
||||||
s32 buffer_offset;
|
s32 buffer_offset;
|
||||||
|
INSERT_UNION_PADDING_WORDS(3);
|
||||||
|
|
||||||
|
GPUVAddr Address() const {
|
||||||
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||||
|
address_low);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
static_assert(sizeof(TransformFeedbackBinding) == 32);
|
static_assert(sizeof(TransformFeedbackBinding) == 32);
|
||||||
|
|
||||||
struct alignas(16) TransformFeedbackLayout {
|
struct TransformFeedbackLayout {
|
||||||
u32 stream;
|
u32 stream;
|
||||||
u32 varying_count;
|
u32 varying_count;
|
||||||
u32 stride;
|
u32 stride;
|
||||||
|
INSERT_UNION_PADDING_WORDS(1);
|
||||||
};
|
};
|
||||||
static_assert(sizeof(TransformFeedbackLayout) == 16);
|
static_assert(sizeof(TransformFeedbackLayout) == 16);
|
||||||
|
|
||||||
@ -652,6 +659,10 @@ public:
|
|||||||
return shader_config[index].enable != 0;
|
return shader_config[index].enable != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsShaderConfigEnabled(Regs::ShaderProgram type) const {
|
||||||
|
return IsShaderConfigEnabled(static_cast<std::size_t>(type));
|
||||||
|
}
|
||||||
|
|
||||||
union {
|
union {
|
||||||
struct {
|
struct {
|
||||||
INSERT_UNION_PADDING_WORDS(0x45);
|
INSERT_UNION_PADDING_WORDS(0x45);
|
||||||
|
@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
SyncCullMode();
|
SyncCullMode();
|
||||||
SyncPrimitiveRestart();
|
SyncPrimitiveRestart();
|
||||||
SyncScissorTest();
|
SyncScissorTest();
|
||||||
SyncTransformFeedback();
|
|
||||||
SyncPointState();
|
SyncPointState();
|
||||||
SyncPolygonOffset();
|
SyncPolygonOffset();
|
||||||
SyncAlphaTest();
|
SyncAlphaTest();
|
||||||
@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
glTextureBarrier();
|
glTextureBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
++num_queued_commands;
|
BeginTransformFeedback(primitive_mode);
|
||||||
|
|
||||||
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
|
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
|
||||||
const GLsizei num_instances =
|
const GLsizei num_instances =
|
||||||
@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
num_instances, base_instance);
|
num_instances, base_instance);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EndTransformFeedback();
|
||||||
|
|
||||||
|
++num_queued_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SyncTransformFeedback() {
|
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
|
||||||
UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerOpenGL::SyncPointState() {
|
void RasterizerOpenGL::SyncPointState() {
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
auto& flags = gpu.dirty.flags;
|
auto& flags = gpu.dirty.flags;
|
||||||
@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() {
|
|||||||
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
if (regs.tfb_enabled == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||||
|
|
||||||
|
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||||
|
const auto& binding = regs.tfb_bindings[index];
|
||||||
|
if (!binding.buffer_enable) {
|
||||||
|
if (enabled_transform_feedback_buffers[index]) {
|
||||||
|
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
enabled_transform_feedback_buffers[index] = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
enabled_transform_feedback_buffers[index] = true;
|
||||||
|
|
||||||
|
auto& tfb_buffer = transform_feedback_buffers[index];
|
||||||
|
tfb_buffer.Create();
|
||||||
|
|
||||||
|
const GLuint handle = tfb_buffer.handle;
|
||||||
|
const std::size_t size = binding.buffer_size;
|
||||||
|
glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
|
||||||
|
glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
|
||||||
|
static_cast<GLsizeiptr>(size));
|
||||||
|
}
|
||||||
|
|
||||||
|
glBeginTransformFeedback(GL_POINTS);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::EndTransformFeedback() {
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
if (regs.tfb_enabled == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
glEndTransformFeedback();
|
||||||
|
|
||||||
|
for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
|
||||||
|
const auto& binding = regs.tfb_bindings[index];
|
||||||
|
if (!binding.buffer_enable) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||||
|
|
||||||
|
const GLuint handle = transform_feedback_buffers[index].handle;
|
||||||
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
|
const std::size_t size = binding.buffer_size;
|
||||||
|
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
|
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
@ -168,9 +168,6 @@ private:
|
|||||||
/// Syncs the scissor test state to match the guest state
|
/// Syncs the scissor test state to match the guest state
|
||||||
void SyncScissorTest();
|
void SyncScissorTest();
|
||||||
|
|
||||||
/// Syncs the transform feedback state to match the guest state
|
|
||||||
void SyncTransformFeedback();
|
|
||||||
|
|
||||||
/// Syncs the point state to match the guest state
|
/// Syncs the point state to match the guest state
|
||||||
void SyncPointState();
|
void SyncPointState();
|
||||||
|
|
||||||
@ -192,6 +189,12 @@ private:
|
|||||||
/// Syncs the framebuffer sRGB state to match the guest state
|
/// Syncs the framebuffer sRGB state to match the guest state
|
||||||
void SyncFramebufferSRGB();
|
void SyncFramebufferSRGB();
|
||||||
|
|
||||||
|
/// Begin a transform feedback
|
||||||
|
void BeginTransformFeedback(GLenum primitive_mode);
|
||||||
|
|
||||||
|
/// End a transform feedback
|
||||||
|
void EndTransformFeedback();
|
||||||
|
|
||||||
/// Check for extension that are not strictly required but are needed for correct emulation
|
/// Check for extension that are not strictly required but are needed for correct emulation
|
||||||
void CheckExtensions();
|
void CheckExtensions();
|
||||||
|
|
||||||
@ -229,6 +232,11 @@ private:
|
|||||||
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
||||||
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
||||||
|
|
||||||
|
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||||
|
transform_feedback_buffers;
|
||||||
|
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||||
|
enabled_transform_feedback_buffers;
|
||||||
|
|
||||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||||
std::size_t num_queued_commands = 0;
|
std::size_t num_queued_commands = 0;
|
||||||
|
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
#include "video_core/shader/ast.h"
|
#include "video_core/shader/ast.h"
|
||||||
#include "video_core/shader/node.h"
|
#include "video_core/shader/node.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
#include "video_core/shader/transform_feedback.h"
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode;
|
|||||||
using Tegra::Shader::IpaMode;
|
using Tegra::Shader::IpaMode;
|
||||||
using Tegra::Shader::IpaSampleMode;
|
using Tegra::Shader::IpaSampleMode;
|
||||||
using Tegra::Shader::Register;
|
using Tegra::Shader::Register;
|
||||||
|
using VideoCommon::Shader::BuildTransformFeedback;
|
||||||
using VideoCommon::Shader::Registry;
|
using VideoCommon::Shader::Registry;
|
||||||
|
|
||||||
using namespace std::string_literals;
|
using namespace std::string_literals;
|
||||||
@ -49,6 +51,11 @@ class ExprDecompiler;
|
|||||||
|
|
||||||
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||||
|
|
||||||
|
constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
|
||||||
|
|
||||||
|
constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
|
||||||
|
constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
|
||||||
|
|
||||||
struct TextureOffset {};
|
struct TextureOffset {};
|
||||||
struct TextureDerivates {};
|
struct TextureDerivates {};
|
||||||
using TextureArgument = std::pair<Type, Node>;
|
using TextureArgument = std::pair<Type, Node>;
|
||||||
@ -390,12 +397,22 @@ std::string FlowStackTopName(MetaStackClass stack) {
|
|||||||
return stage == ShaderType::Vertex;
|
return stage == ShaderType::Vertex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct GenericVaryingDescription {
|
||||||
|
std::string name;
|
||||||
|
u8 first_element = 0;
|
||||||
|
bool is_scalar = false;
|
||||||
|
};
|
||||||
|
|
||||||
class GLSLDecompiler final {
|
class GLSLDecompiler final {
|
||||||
public:
|
public:
|
||||||
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
|
||||||
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
ShaderType stage, std::string_view identifier, std::string_view suffix)
|
||||||
: device{device}, ir{ir}, registry{registry}, stage{stage},
|
: device{device}, ir{ir}, registry{registry}, stage{stage},
|
||||||
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {}
|
identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
|
||||||
|
if (stage != ShaderType::Compute) {
|
||||||
|
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Decompile() {
|
void Decompile() {
|
||||||
DeclareHeader();
|
DeclareHeader();
|
||||||
@ -403,17 +420,17 @@ public:
|
|||||||
DeclareGeometry();
|
DeclareGeometry();
|
||||||
DeclareFragment();
|
DeclareFragment();
|
||||||
DeclareCompute();
|
DeclareCompute();
|
||||||
DeclareRegisters();
|
|
||||||
DeclareCustomVariables();
|
|
||||||
DeclarePredicates();
|
|
||||||
DeclareLocalMemory();
|
|
||||||
DeclareInternalFlags();
|
|
||||||
DeclareInputAttributes();
|
DeclareInputAttributes();
|
||||||
DeclareOutputAttributes();
|
DeclareOutputAttributes();
|
||||||
DeclareConstantBuffers();
|
|
||||||
DeclareGlobalMemory();
|
|
||||||
DeclareSamplers();
|
|
||||||
DeclareImages();
|
DeclareImages();
|
||||||
|
DeclareSamplers();
|
||||||
|
DeclareGlobalMemory();
|
||||||
|
DeclareConstantBuffers();
|
||||||
|
DeclareLocalMemory();
|
||||||
|
DeclareRegisters();
|
||||||
|
DeclarePredicates();
|
||||||
|
DeclareInternalFlags();
|
||||||
|
DeclareCustomVariables();
|
||||||
DeclarePhysicalAttributeReader();
|
DeclarePhysicalAttributeReader();
|
||||||
|
|
||||||
code.AddLine("void main() {{");
|
code.AddLine("void main() {{");
|
||||||
@ -485,7 +502,7 @@ private:
|
|||||||
if (!identifier.empty()) {
|
if (!identifier.empty()) {
|
||||||
code.AddLine("// {}", identifier);
|
code.AddLine("// {}", identifier);
|
||||||
}
|
}
|
||||||
code.AddLine("#version 430 core");
|
code.AddLine("#version 440 core");
|
||||||
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
|
code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
|
||||||
if (device.HasShaderBallot()) {
|
if (device.HasShaderBallot()) {
|
||||||
code.AddLine("#extension GL_ARB_shader_ballot : require");
|
code.AddLine("#extension GL_ARB_shader_ballot : require");
|
||||||
@ -570,7 +587,13 @@ private:
|
|||||||
code.AddLine("out gl_PerVertex {{");
|
code.AddLine("out gl_PerVertex {{");
|
||||||
++code.scope;
|
++code.scope;
|
||||||
|
|
||||||
code.AddLine("vec4 gl_Position;");
|
auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
|
||||||
|
if (!pos_xfb.empty()) {
|
||||||
|
pos_xfb = fmt::format("layout ({}) ", pos_xfb);
|
||||||
|
}
|
||||||
|
const char* pos_type =
|
||||||
|
FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
|
||||||
|
code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
|
||||||
|
|
||||||
for (const auto attribute : ir.GetOutputAttributes()) {
|
for (const auto attribute : ir.GetOutputAttributes()) {
|
||||||
if (attribute == Attribute::Index::ClipDistances0123 ||
|
if (attribute == Attribute::Index::ClipDistances0123 ||
|
||||||
@ -703,7 +726,7 @@ private:
|
|||||||
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
|
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
|
||||||
const u32 location{GetGenericAttributeIndex(index)};
|
const u32 location{GetGenericAttributeIndex(index)};
|
||||||
|
|
||||||
std::string name{GetInputAttribute(index)};
|
std::string name{GetGenericInputAttribute(index)};
|
||||||
if (stage == ShaderType::Geometry) {
|
if (stage == ShaderType::Geometry) {
|
||||||
name = "gs_" + name + "[]";
|
name = "gs_" + name + "[]";
|
||||||
}
|
}
|
||||||
@ -740,9 +763,59 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
|
||||||
|
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
|
||||||
|
const auto it = transform_feedback.find(location);
|
||||||
|
if (it == transform_feedback.end()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
return it->second.components;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
|
||||||
|
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
|
||||||
|
const auto it = transform_feedback.find(location);
|
||||||
|
if (it == transform_feedback.end()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
const VaryingTFB& tfb = it->second;
|
||||||
|
return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
|
||||||
|
tfb.offset, tfb.stride);
|
||||||
|
}
|
||||||
|
|
||||||
void DeclareOutputAttribute(Attribute::Index index) {
|
void DeclareOutputAttribute(Attribute::Index index) {
|
||||||
const u32 location{GetGenericAttributeIndex(index)};
|
static constexpr std::string_view swizzle = "xyzw";
|
||||||
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
|
u8 element = 0;
|
||||||
|
while (element < 4) {
|
||||||
|
auto xfb = GetTransformFeedbackDecoration(index, element);
|
||||||
|
if (!xfb.empty()) {
|
||||||
|
xfb = fmt::format(", {}", xfb);
|
||||||
|
}
|
||||||
|
const std::size_t remainder = 4 - element;
|
||||||
|
const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
|
||||||
|
const char* const type = FLOAT_TYPES.at(num_components - 1);
|
||||||
|
|
||||||
|
const u32 location = GetGenericAttributeIndex(index);
|
||||||
|
|
||||||
|
GenericVaryingDescription description;
|
||||||
|
description.first_element = static_cast<u8>(element);
|
||||||
|
description.is_scalar = num_components == 1;
|
||||||
|
description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
|
||||||
|
if (element != 0 || num_components != 4) {
|
||||||
|
const std::string_view name_swizzle = swizzle.substr(element, num_components);
|
||||||
|
description.name = fmt::format("{}_{}", description.name, name_swizzle);
|
||||||
|
}
|
||||||
|
for (std::size_t i = 0; i < num_components; ++i) {
|
||||||
|
const u8 offset = static_cast<u8>(location * 4 + element + i);
|
||||||
|
varying_description.insert({offset, description});
|
||||||
|
}
|
||||||
|
|
||||||
|
code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
|
||||||
|
xfb, type, description.name);
|
||||||
|
|
||||||
|
element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeclareConstantBuffers() {
|
void DeclareConstantBuffers() {
|
||||||
@ -1095,7 +1168,7 @@ private:
|
|||||||
return {"0", Type::Int};
|
return {"0", Type::Int};
|
||||||
default:
|
default:
|
||||||
if (IsGenericAttribute(attribute)) {
|
if (IsGenericAttribute(attribute)) {
|
||||||
return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
|
return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
|
||||||
Type::Float};
|
Type::Float};
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -1164,8 +1237,7 @@ private:
|
|||||||
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
|
return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
|
||||||
default:
|
default:
|
||||||
if (IsGenericAttribute(attribute)) {
|
if (IsGenericAttribute(attribute)) {
|
||||||
return {
|
return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}};
|
||||||
{GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
|
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
|
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
|
||||||
return {};
|
return {};
|
||||||
@ -2376,27 +2448,34 @@ private:
|
|||||||
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
|
||||||
|
|
||||||
std::string GetRegister(u32 index) const {
|
std::string GetRegister(u32 index) const {
|
||||||
return GetDeclarationWithSuffix(index, "gpr");
|
return AppendSuffix(index, "gpr");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetCustomVariable(u32 index) const {
|
std::string GetCustomVariable(u32 index) const {
|
||||||
return GetDeclarationWithSuffix(index, "custom_var");
|
return AppendSuffix(index, "custom_var");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetPredicate(Tegra::Shader::Pred pred) const {
|
std::string GetPredicate(Tegra::Shader::Pred pred) const {
|
||||||
return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
|
return AppendSuffix(static_cast<u32>(pred), "pred");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetInputAttribute(Attribute::Index attribute) const {
|
std::string GetGenericInputAttribute(Attribute::Index attribute) const {
|
||||||
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
|
return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetOutputAttribute(Attribute::Index attribute) const {
|
std::unordered_map<u8, GenericVaryingDescription> varying_description;
|
||||||
return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
|
|
||||||
|
std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
|
||||||
|
const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
|
||||||
|
const auto& description = varying_description.at(offset);
|
||||||
|
if (description.is_scalar) {
|
||||||
|
return description.name;
|
||||||
|
}
|
||||||
|
return fmt::format("{}[{}]", description.name, element - description.first_element);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetConstBuffer(u32 index) const {
|
std::string GetConstBuffer(u32 index) const {
|
||||||
return GetDeclarationWithSuffix(index, "cbuf");
|
return AppendSuffix(index, "cbuf");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
|
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
|
||||||
@ -2409,7 +2488,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string GetConstBufferBlock(u32 index) const {
|
std::string GetConstBufferBlock(u32 index) const {
|
||||||
return GetDeclarationWithSuffix(index, "cbuf_block");
|
return AppendSuffix(index, "cbuf_block");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetLocalMemory() const {
|
std::string GetLocalMemory() const {
|
||||||
@ -2434,14 +2513,14 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string GetSampler(const Sampler& sampler) const {
|
std::string GetSampler(const Sampler& sampler) const {
|
||||||
return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
|
return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetImage(const Image& image) const {
|
std::string GetImage(const Image& image) const {
|
||||||
return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
|
return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
|
std::string AppendSuffix(u32 index, std::string_view name) const {
|
||||||
if (suffix.empty()) {
|
if (suffix.empty()) {
|
||||||
return fmt::format("{}{}", name, index);
|
return fmt::format("{}{}", name, index);
|
||||||
} else {
|
} else {
|
||||||
@ -2477,6 +2556,7 @@ private:
|
|||||||
const std::string_view identifier;
|
const std::string_view identifier;
|
||||||
const std::string_view suffix;
|
const std::string_view suffix;
|
||||||
const Header header;
|
const Header header;
|
||||||
|
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||||
|
|
||||||
ShaderWriter code;
|
ShaderWriter code;
|
||||||
|
|
||||||
|
@ -107,8 +107,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
|||||||
features.occlusionQueryPrecise = true;
|
features.occlusionQueryPrecise = true;
|
||||||
features.fragmentStoresAndAtomics = true;
|
features.fragmentStoresAndAtomics = true;
|
||||||
features.shaderImageGatherExtended = true;
|
features.shaderImageGatherExtended = true;
|
||||||
features.shaderStorageImageReadWithoutFormat =
|
features.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported;
|
||||||
is_shader_storage_img_read_without_format_supported;
|
|
||||||
features.shaderStorageImageWriteWithoutFormat = true;
|
features.shaderStorageImageWriteWithoutFormat = true;
|
||||||
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
|
features.textureCompressionASTC_LDR = is_optimal_astc_supported;
|
||||||
|
|
||||||
@ -148,6 +147,15 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
|||||||
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
|
LOG_INFO(Render_Vulkan, "Device doesn't support uint8 indexes");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vk::PhysicalDeviceTransformFeedbackFeaturesEXT transform_feedback;
|
||||||
|
if (ext_transform_feedback) {
|
||||||
|
transform_feedback.transformFeedback = true;
|
||||||
|
transform_feedback.geometryStreams = true;
|
||||||
|
SetNext(next, transform_feedback);
|
||||||
|
} else {
|
||||||
|
LOG_INFO(Render_Vulkan, "Device doesn't support transform feedbacks");
|
||||||
|
}
|
||||||
|
|
||||||
if (!ext_depth_range_unrestricted) {
|
if (!ext_depth_range_unrestricted) {
|
||||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||||
}
|
}
|
||||||
@ -385,7 +393,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extensions.reserve(14);
|
extensions.reserve(15);
|
||||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
||||||
@ -397,18 +405,22 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
|||||||
|
|
||||||
[[maybe_unused]] const bool nsight =
|
[[maybe_unused]] const bool nsight =
|
||||||
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||||
bool khr_shader_float16_int8{};
|
bool has_khr_shader_float16_int8{};
|
||||||
bool ext_subgroup_size_control{};
|
bool has_ext_subgroup_size_control{};
|
||||||
|
bool has_ext_transform_feedback{};
|
||||||
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
|
||||||
Test(extension, khr_uniform_buffer_standard_layout,
|
Test(extension, khr_uniform_buffer_standard_layout,
|
||||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||||
Test(extension, khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
|
||||||
|
false);
|
||||||
Test(extension, ext_depth_range_unrestricted,
|
Test(extension, ext_depth_range_unrestricted,
|
||||||
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||||
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
Test(extension, ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||||
Test(extension, ext_shader_viewport_index_layer,
|
Test(extension, ext_shader_viewport_index_layer,
|
||||||
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
|
VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME, true);
|
||||||
Test(extension, ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
|
Test(extension, has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME,
|
||||||
|
false);
|
||||||
|
Test(extension, has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME,
|
||||||
false);
|
false);
|
||||||
if (Settings::values.renderer_debug) {
|
if (Settings::values.renderer_debug) {
|
||||||
Test(extension, nv_device_diagnostic_checkpoints,
|
Test(extension, nv_device_diagnostic_checkpoints,
|
||||||
@ -416,13 +428,13 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (khr_shader_float16_int8) {
|
if (has_khr_shader_float16_int8) {
|
||||||
is_float16_supported =
|
is_float16_supported =
|
||||||
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
|
GetFeatures<vk::PhysicalDeviceFloat16Int8FeaturesKHR>(physical, dldi).shaderFloat16;
|
||||||
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ext_subgroup_size_control) {
|
if (has_ext_subgroup_size_control) {
|
||||||
const auto features =
|
const auto features =
|
||||||
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
|
GetFeatures<vk::PhysicalDeviceSubgroupSizeControlFeaturesEXT>(physical, dldi);
|
||||||
const auto properties =
|
const auto properties =
|
||||||
@ -439,6 +451,20 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
|||||||
is_warp_potentially_bigger = true;
|
is_warp_potentially_bigger = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (has_ext_transform_feedback) {
|
||||||
|
const auto features =
|
||||||
|
GetFeatures<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>(physical, dldi);
|
||||||
|
const auto properties =
|
||||||
|
GetProperties<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>(physical, dldi);
|
||||||
|
|
||||||
|
if (features.transformFeedback && features.geometryStreams &&
|
||||||
|
properties.maxTransformFeedbackStreams >= 4 && properties.maxTransformFeedbackBuffers &&
|
||||||
|
properties.transformFeedbackQueries && properties.transformFeedbackDraw) {
|
||||||
|
extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME);
|
||||||
|
ext_transform_feedback = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return extensions;
|
return extensions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -467,8 +493,7 @@ void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceK
|
|||||||
|
|
||||||
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
|
void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
|
||||||
const auto supported_features{physical.getFeatures(dldi)};
|
const auto supported_features{physical.getFeatures(dldi)};
|
||||||
is_shader_storage_img_read_without_format_supported =
|
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
|
||||||
supported_features.shaderStorageImageReadWithoutFormat;
|
|
||||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
|
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,11 +122,6 @@ public:
|
|||||||
return properties.limits.maxPushConstantsSize;
|
return properties.limits.maxPushConstantsSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if Shader storage Image Read Without Format supported.
|
|
||||||
bool IsShaderStorageImageReadWithoutFormatSupported() const {
|
|
||||||
return is_shader_storage_img_read_without_format_supported;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if ASTC is natively supported.
|
/// Returns true if ASTC is natively supported.
|
||||||
bool IsOptimalAstcSupported() const {
|
bool IsOptimalAstcSupported() const {
|
||||||
return is_optimal_astc_supported;
|
return is_optimal_astc_supported;
|
||||||
@ -147,6 +142,11 @@ public:
|
|||||||
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
|
return (guest_warp_stages & stage) != vk::ShaderStageFlags{};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if formatless image load is supported.
|
||||||
|
bool IsFormatlessImageLoadSupported() const {
|
||||||
|
return is_formatless_image_load_supported;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the device supports VK_EXT_scalar_block_layout.
|
/// Returns true if the device supports VK_EXT_scalar_block_layout.
|
||||||
bool IsKhrUniformBufferStandardLayoutSupported() const {
|
bool IsKhrUniformBufferStandardLayoutSupported() const {
|
||||||
return khr_uniform_buffer_standard_layout;
|
return khr_uniform_buffer_standard_layout;
|
||||||
@ -167,6 +167,11 @@ public:
|
|||||||
return ext_shader_viewport_index_layer;
|
return ext_shader_viewport_index_layer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the device supports VK_EXT_transform_feedback.
|
||||||
|
bool IsExtTransformFeedbackSupported() const {
|
||||||
|
return ext_transform_feedback;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
|
/// Returns true if the device supports VK_NV_device_diagnostic_checkpoints.
|
||||||
bool IsNvDeviceDiagnosticCheckpoints() const {
|
bool IsNvDeviceDiagnosticCheckpoints() const {
|
||||||
return nv_device_diagnostic_checkpoints;
|
return nv_device_diagnostic_checkpoints;
|
||||||
@ -214,26 +219,26 @@ private:
|
|||||||
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
|
static std::unordered_map<vk::Format, vk::FormatProperties> GetFormatProperties(
|
||||||
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
|
const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
|
||||||
|
|
||||||
const vk::PhysicalDevice physical; ///< Physical device.
|
const vk::PhysicalDevice physical; ///< Physical device.
|
||||||
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
|
vk::DispatchLoaderDynamic dld; ///< Device function pointers.
|
||||||
vk::PhysicalDeviceProperties properties; ///< Device properties.
|
vk::PhysicalDeviceProperties properties; ///< Device properties.
|
||||||
UniqueDevice logical; ///< Logical device.
|
UniqueDevice logical; ///< Logical device.
|
||||||
vk::Queue graphics_queue; ///< Main graphics queue.
|
vk::Queue graphics_queue; ///< Main graphics queue.
|
||||||
vk::Queue present_queue; ///< Main present queue.
|
vk::Queue present_queue; ///< Main present queue.
|
||||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||||
u32 present_family{}; ///< Main present queue family index.
|
u32 present_family{}; ///< Main present queue family index.
|
||||||
vk::DriverIdKHR driver_id{}; ///< Driver ID.
|
vk::DriverIdKHR driver_id{}; ///< Driver ID.
|
||||||
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
vk::ShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
|
||||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||||
|
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
|
||||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
||||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||||
|
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
|
||||||
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
|
bool nv_device_diagnostic_checkpoints{}; ///< Support for VK_NV_device_diagnostic_checkpoints.
|
||||||
bool is_shader_storage_img_read_without_format_supported{}; ///< Support for shader storage
|
|
||||||
///< image read without format
|
|
||||||
|
|
||||||
// Telemetry parameters
|
// Telemetry parameters
|
||||||
std::string vendor_name; ///< Device's driver name.
|
std::string vendor_name; ///< Device's driver name.
|
||||||
|
@ -273,9 +273,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
|||||||
specialization.workgroup_size = key.workgroup_size;
|
specialization.workgroup_size = key.workgroup_size;
|
||||||
specialization.shared_memory_size = key.shared_memory_size;
|
specialization.shared_memory_size = key.shared_memory_size;
|
||||||
|
|
||||||
const SPIRVShader spirv_shader{
|
const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
|
||||||
Decompile(device, shader->GetIR(), ShaderType::Compute, specialization),
|
shader->GetRegistry(), specialization),
|
||||||
shader->GetEntries()};
|
shader->GetEntries()};
|
||||||
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
|
entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
|
||||||
update_descriptor_queue, spirv_shader);
|
update_descriptor_queue, spirv_shader);
|
||||||
return *entry;
|
return *entry;
|
||||||
@ -324,8 +324,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
|||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
|
||||||
Specialization specialization;
|
Specialization specialization;
|
||||||
specialization.primitive_topology = fixed_state.input_assembly.topology;
|
if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) {
|
||||||
if (specialization.primitive_topology == Maxwell::PrimitiveTopology::Points) {
|
|
||||||
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
|
ASSERT(fixed_state.input_assembly.point_size != 0.0f);
|
||||||
specialization.point_size = fixed_state.input_assembly.point_size;
|
specialization.point_size = fixed_state.input_assembly.point_size;
|
||||||
}
|
}
|
||||||
@ -333,9 +332,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
|||||||
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
|
specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type;
|
||||||
}
|
}
|
||||||
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one;
|
||||||
specialization.tessellation.primitive = fixed_state.tessellation.primitive;
|
|
||||||
specialization.tessellation.spacing = fixed_state.tessellation.spacing;
|
|
||||||
specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
|
|
||||||
|
|
||||||
SPIRVProgram program;
|
SPIRVProgram program;
|
||||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||||
@ -356,8 +352,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
|||||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||||
const auto program_type = GetShaderType(program_enum);
|
const auto program_type = GetShaderType(program_enum);
|
||||||
const auto& entries = shader->GetEntries();
|
const auto& entries = shader->GetEntries();
|
||||||
program[stage] = {Decompile(device, shader->GetIR(), program_type, specialization),
|
program[stage] = {
|
||||||
entries};
|
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
|
||||||
|
entries};
|
||||||
|
|
||||||
if (program_enum == Maxwell::ShaderProgram::VertexA) {
|
if (program_enum == Maxwell::ShaderProgram::VertexA) {
|
||||||
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
// VertexB was combined with VertexA, so we skip the VertexB iteration
|
||||||
|
@ -132,6 +132,10 @@ public:
|
|||||||
return shader_ir;
|
return shader_ir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const VideoCommon::Shader::Registry& GetRegistry() const {
|
||||||
|
return registry;
|
||||||
|
}
|
||||||
|
|
||||||
const VideoCommon::Shader::ShaderIR& GetIR() const {
|
const VideoCommon::Shader::ShaderIR& GetIR() const {
|
||||||
return shader_ir;
|
return shader_ir;
|
||||||
}
|
}
|
||||||
|
@ -347,6 +347,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
|
[&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BeginTransformFeedback();
|
||||||
|
|
||||||
const auto pipeline_layout = pipeline.GetLayout();
|
const auto pipeline_layout = pipeline.GetLayout();
|
||||||
const auto descriptor_set = pipeline.CommitDescriptorSet();
|
const auto descriptor_set = pipeline.CommitDescriptorSet();
|
||||||
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
|
scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
|
||||||
@ -356,6 +358,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||||||
}
|
}
|
||||||
draw_params.Draw(cmdbuf, dld);
|
draw_params.Draw(cmdbuf, dld);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
EndTransformFeedback();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::Clear() {
|
void RasterizerVulkan::Clear() {
|
||||||
@ -738,6 +742,44 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
|||||||
UpdateStencilFaces(regs);
|
UpdateStencilFaces(regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::BeginTransformFeedback() {
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
if (regs.tfb_enabled == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
|
||||||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
|
||||||
|
regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
|
||||||
|
|
||||||
|
UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable);
|
||||||
|
UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable);
|
||||||
|
UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable);
|
||||||
|
|
||||||
|
const auto& binding = regs.tfb_bindings[0];
|
||||||
|
UNIMPLEMENTED_IF(binding.buffer_enable == 0);
|
||||||
|
UNIMPLEMENTED_IF(binding.buffer_offset != 0);
|
||||||
|
|
||||||
|
const GPUVAddr gpu_addr = binding.Address();
|
||||||
|
const std::size_t size = binding.buffer_size;
|
||||||
|
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
|
||||||
|
|
||||||
|
scheduler.Record([buffer = *buffer, offset = offset, size](auto cmdbuf, auto& dld) {
|
||||||
|
cmdbuf.bindTransformFeedbackBuffersEXT(0, {buffer}, {offset}, {size}, dld);
|
||||||
|
cmdbuf.beginTransformFeedbackEXT(0, {}, {}, dld);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::EndTransformFeedback() {
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
if (regs.tfb_enabled == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
scheduler.Record(
|
||||||
|
[](auto cmdbuf, auto& dld) { cmdbuf.endTransformFeedbackEXT(0, {}, {}, dld); });
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
|
void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
|
||||||
BufferBindings& buffer_bindings) {
|
BufferBindings& buffer_bindings) {
|
||||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
@ -169,6 +169,10 @@ private:
|
|||||||
|
|
||||||
void UpdateDynamicStates();
|
void UpdateDynamicStates();
|
||||||
|
|
||||||
|
void BeginTransformFeedback();
|
||||||
|
|
||||||
|
void EndTransformFeedback();
|
||||||
|
|
||||||
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
|
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
|
||||||
|
|
||||||
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
|
void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
|
||||||
|
@ -5,7 +5,9 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <optional>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
#include <unordered_map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
@ -24,6 +26,7 @@
|
|||||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||||
#include "video_core/shader/node.h"
|
#include "video_core/shader/node.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
#include "video_core/shader/transform_feedback.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
@ -93,6 +96,12 @@ struct VertexIndices {
|
|||||||
std::optional<u32> clip_distances;
|
std::optional<u32> clip_distances;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct GenericVaryingDescription {
|
||||||
|
Id id = nullptr;
|
||||||
|
u32 first_element = 0;
|
||||||
|
bool is_scalar = false;
|
||||||
|
};
|
||||||
|
|
||||||
spv::Dim GetSamplerDim(const Sampler& sampler) {
|
spv::Dim GetSamplerDim(const Sampler& sampler) {
|
||||||
ASSERT(!sampler.IsBuffer());
|
ASSERT(!sampler.IsBuffer());
|
||||||
switch (sampler.GetType()) {
|
switch (sampler.GetType()) {
|
||||||
@ -266,9 +275,13 @@ bool IsPrecise(Operation operand) {
|
|||||||
class SPIRVDecompiler final : public Sirit::Module {
|
class SPIRVDecompiler final : public Sirit::Module {
|
||||||
public:
|
public:
|
||||||
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
|
explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderType stage,
|
||||||
const Specialization& specialization)
|
const Registry& registry, const Specialization& specialization)
|
||||||
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
|
: Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()},
|
||||||
specialization{specialization} {
|
registry{registry}, specialization{specialization} {
|
||||||
|
if (stage != ShaderType::Compute) {
|
||||||
|
transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
|
||||||
|
}
|
||||||
|
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
AddCapability(spv::Capability::UniformAndStorageBuffer16BitAccess);
|
||||||
AddCapability(spv::Capability::ImageQuery);
|
AddCapability(spv::Capability::ImageQuery);
|
||||||
@ -286,6 +299,15 @@ public:
|
|||||||
AddExtension("SPV_KHR_variable_pointers");
|
AddExtension("SPV_KHR_variable_pointers");
|
||||||
AddExtension("SPV_KHR_shader_draw_parameters");
|
AddExtension("SPV_KHR_shader_draw_parameters");
|
||||||
|
|
||||||
|
if (!transform_feedback.empty()) {
|
||||||
|
if (device.IsExtTransformFeedbackSupported()) {
|
||||||
|
AddCapability(spv::Capability::TransformFeedback);
|
||||||
|
} else {
|
||||||
|
LOG_ERROR(Render_Vulkan, "Shader requires transform feedbacks but these are not "
|
||||||
|
"supported on this device");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
|
if (ir.UsesLayer() || ir.UsesViewportIndex()) {
|
||||||
if (ir.UsesViewportIndex()) {
|
if (ir.UsesViewportIndex()) {
|
||||||
AddCapability(spv::Capability::MultiViewport);
|
AddCapability(spv::Capability::MultiViewport);
|
||||||
@ -296,7 +318,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (device.IsShaderStorageImageReadWithoutFormatSupported()) {
|
if (device.IsFormatlessImageLoadSupported()) {
|
||||||
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
AddCapability(spv::Capability::StorageImageReadWithoutFormat);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -318,25 +340,29 @@ public:
|
|||||||
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||||
header.common2.threads_per_input_primitive);
|
header.common2.threads_per_input_primitive);
|
||||||
break;
|
break;
|
||||||
case ShaderType::TesselationEval:
|
case ShaderType::TesselationEval: {
|
||||||
|
const auto& info = registry.GetGraphicsInfo();
|
||||||
AddCapability(spv::Capability::Tessellation);
|
AddCapability(spv::Capability::Tessellation);
|
||||||
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
|
AddEntryPoint(spv::ExecutionModel::TessellationEvaluation, main, "main", interfaces);
|
||||||
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.primitive));
|
AddExecutionMode(main, GetExecutionMode(info.tessellation_primitive));
|
||||||
AddExecutionMode(main, GetExecutionMode(specialization.tessellation.spacing));
|
AddExecutionMode(main, GetExecutionMode(info.tessellation_spacing));
|
||||||
AddExecutionMode(main, specialization.tessellation.clockwise
|
AddExecutionMode(main, info.tessellation_clockwise
|
||||||
? spv::ExecutionMode::VertexOrderCw
|
? spv::ExecutionMode::VertexOrderCw
|
||||||
: spv::ExecutionMode::VertexOrderCcw);
|
: spv::ExecutionMode::VertexOrderCcw);
|
||||||
break;
|
break;
|
||||||
case ShaderType::Geometry:
|
}
|
||||||
|
case ShaderType::Geometry: {
|
||||||
|
const auto& info = registry.GetGraphicsInfo();
|
||||||
AddCapability(spv::Capability::Geometry);
|
AddCapability(spv::Capability::Geometry);
|
||||||
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
|
AddEntryPoint(spv::ExecutionModel::Geometry, main, "main", interfaces);
|
||||||
AddExecutionMode(main, GetExecutionMode(specialization.primitive_topology));
|
AddExecutionMode(main, GetExecutionMode(info.primitive_topology));
|
||||||
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
|
AddExecutionMode(main, GetExecutionMode(header.common3.output_topology));
|
||||||
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
AddExecutionMode(main, spv::ExecutionMode::OutputVertices,
|
||||||
header.common4.max_output_vertices);
|
header.common4.max_output_vertices);
|
||||||
// TODO(Rodrigo): Where can we get this info from?
|
// TODO(Rodrigo): Where can we get this info from?
|
||||||
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
|
AddExecutionMode(main, spv::ExecutionMode::Invocations, 1U);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case ShaderType::Fragment:
|
case ShaderType::Fragment:
|
||||||
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
|
AddEntryPoint(spv::ExecutionModel::Fragment, main, "main", interfaces);
|
||||||
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
|
||||||
@ -545,7 +571,8 @@ private:
|
|||||||
if (stage != ShaderType::Geometry) {
|
if (stage != ShaderType::Geometry) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const u32 num_input = GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
|
const auto& info = registry.GetGraphicsInfo();
|
||||||
|
const u32 num_input = GetNumPrimitiveTopologyVertices(info.primitive_topology);
|
||||||
DeclareInputVertexArray(num_input);
|
DeclareInputVertexArray(num_input);
|
||||||
DeclareOutputVertex();
|
DeclareOutputVertex();
|
||||||
}
|
}
|
||||||
@ -742,12 +769,34 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void DeclareOutputAttributes() {
|
void DeclareOutputAttributes() {
|
||||||
|
if (stage == ShaderType::Compute || stage == ShaderType::Fragment) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIMPLEMENTED_IF(registry.GetGraphicsInfo().tfb_enabled && stage != ShaderType::Vertex);
|
||||||
for (const auto index : ir.GetOutputAttributes()) {
|
for (const auto index : ir.GetOutputAttributes()) {
|
||||||
if (!IsGenericAttribute(index)) {
|
if (!IsGenericAttribute(index)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const u32 location = GetGenericAttributeLocation(index);
|
DeclareOutputAttribute(index);
|
||||||
Id type = t_float4;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeclareOutputAttribute(Attribute::Index index) {
|
||||||
|
static constexpr std::string_view swizzle = "xyzw";
|
||||||
|
|
||||||
|
const u32 location = GetGenericAttributeLocation(index);
|
||||||
|
u8 element = 0;
|
||||||
|
while (element < 4) {
|
||||||
|
const std::size_t remainder = 4 - element;
|
||||||
|
|
||||||
|
std::size_t num_components = remainder;
|
||||||
|
const std::optional tfb = GetTransformFeedbackInfo(index, element);
|
||||||
|
if (tfb) {
|
||||||
|
num_components = tfb->components;
|
||||||
|
}
|
||||||
|
|
||||||
|
Id type = GetTypeVectorDefinitionLut(Type::Float).at(num_components - 1);
|
||||||
Id varying_default = v_varying_default;
|
Id varying_default = v_varying_default;
|
||||||
if (IsOutputAttributeArray()) {
|
if (IsOutputAttributeArray()) {
|
||||||
const u32 num = GetNumOutputVertices();
|
const u32 num = GetNumOutputVertices();
|
||||||
@ -760,15 +809,47 @@ private:
|
|||||||
}
|
}
|
||||||
type = TypePointer(spv::StorageClass::Output, type);
|
type = TypePointer(spv::StorageClass::Output, type);
|
||||||
|
|
||||||
|
std::string name = fmt::format("out_attr{}", location);
|
||||||
|
if (num_components < 4 || element > 0) {
|
||||||
|
name = fmt::format("{}_{}", name, swizzle.substr(element, num_components));
|
||||||
|
}
|
||||||
|
|
||||||
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
|
const Id id = OpVariable(type, spv::StorageClass::Output, varying_default);
|
||||||
Name(AddGlobalVariable(id), fmt::format("out_attr{}", location));
|
Name(AddGlobalVariable(id), name);
|
||||||
output_attributes.emplace(index, id);
|
|
||||||
|
GenericVaryingDescription description;
|
||||||
|
description.id = id;
|
||||||
|
description.first_element = element;
|
||||||
|
description.is_scalar = num_components == 1;
|
||||||
|
for (u32 i = 0; i < num_components; ++i) {
|
||||||
|
const u8 offset = static_cast<u8>(static_cast<u32>(index) * 4 + element + i);
|
||||||
|
output_attributes.emplace(offset, description);
|
||||||
|
}
|
||||||
interfaces.push_back(id);
|
interfaces.push_back(id);
|
||||||
|
|
||||||
Decorate(id, spv::Decoration::Location, location);
|
Decorate(id, spv::Decoration::Location, location);
|
||||||
|
if (element > 0) {
|
||||||
|
Decorate(id, spv::Decoration::Component, static_cast<u32>(element));
|
||||||
|
}
|
||||||
|
if (tfb && device.IsExtTransformFeedbackSupported()) {
|
||||||
|
Decorate(id, spv::Decoration::XfbBuffer, static_cast<u32>(tfb->buffer));
|
||||||
|
Decorate(id, spv::Decoration::XfbStride, static_cast<u32>(tfb->stride));
|
||||||
|
Decorate(id, spv::Decoration::Offset, static_cast<u32>(tfb->offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
element += static_cast<u8>(num_components);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<VaryingTFB> GetTransformFeedbackInfo(Attribute::Index index, u8 element = 0) {
|
||||||
|
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
|
||||||
|
const auto it = transform_feedback.find(location);
|
||||||
|
if (it == transform_feedback.end()) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
u32 DeclareConstantBuffers(u32 binding) {
|
u32 DeclareConstantBuffers(u32 binding) {
|
||||||
for (const auto& [index, size] : ir.GetConstantBuffers()) {
|
for (const auto& [index, size] : ir.GetConstantBuffers()) {
|
||||||
const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
|
const Id type = device.IsKhrUniformBufferStandardLayoutSupported() ? t_cbuf_scalar_ubo
|
||||||
@ -898,7 +979,7 @@ private:
|
|||||||
u32 GetNumInputVertices() const {
|
u32 GetNumInputVertices() const {
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case ShaderType::Geometry:
|
case ShaderType::Geometry:
|
||||||
return GetNumPrimitiveTopologyVertices(specialization.primitive_topology);
|
return GetNumPrimitiveTopologyVertices(registry.GetGraphicsInfo().primitive_topology);
|
||||||
case ShaderType::TesselationControl:
|
case ShaderType::TesselationControl:
|
||||||
case ShaderType::TesselationEval:
|
case ShaderType::TesselationEval:
|
||||||
return NumInputPatches;
|
return NumInputPatches;
|
||||||
@ -1346,8 +1427,14 @@ private:
|
|||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
if (IsGenericAttribute(attribute)) {
|
if (IsGenericAttribute(attribute)) {
|
||||||
const Id composite = output_attributes.at(attribute);
|
const u8 offset = static_cast<u8>(static_cast<u8>(attribute) * 4 + element);
|
||||||
return {ArrayPass(t_out_float, composite, {element}), Type::Float};
|
const GenericVaryingDescription description = output_attributes.at(offset);
|
||||||
|
const Id composite = description.id;
|
||||||
|
std::vector<u32> indices;
|
||||||
|
if (!description.is_scalar) {
|
||||||
|
indices.push_back(element - description.first_element);
|
||||||
|
}
|
||||||
|
return {ArrayPass(t_out_float, composite, indices), Type::Float};
|
||||||
}
|
}
|
||||||
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
|
UNIMPLEMENTED_MSG("Unhandled output attribute: {}",
|
||||||
static_cast<u32>(attribute));
|
static_cast<u32>(attribute));
|
||||||
@ -1793,7 +1880,7 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
Expression ImageLoad(Operation operation) {
|
Expression ImageLoad(Operation operation) {
|
||||||
if (!device.IsShaderStorageImageReadWithoutFormatSupported()) {
|
if (!device.IsFormatlessImageLoadSupported()) {
|
||||||
return {v_float_zero, Type::Float};
|
return {v_float_zero, Type::Float};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2258,11 +2345,11 @@ private:
|
|||||||
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
|
std::array<Id, 4> GetTypeVectorDefinitionLut(Type type) const {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case Type::Float:
|
case Type::Float:
|
||||||
return {nullptr, t_float2, t_float3, t_float4};
|
return {t_float, t_float2, t_float3, t_float4};
|
||||||
case Type::Int:
|
case Type::Int:
|
||||||
return {nullptr, t_int2, t_int3, t_int4};
|
return {t_int, t_int2, t_int3, t_int4};
|
||||||
case Type::Uint:
|
case Type::Uint:
|
||||||
return {nullptr, t_uint2, t_uint3, t_uint4};
|
return {t_uint, t_uint2, t_uint3, t_uint4};
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
return {};
|
return {};
|
||||||
@ -2495,7 +2582,9 @@ private:
|
|||||||
const ShaderIR& ir;
|
const ShaderIR& ir;
|
||||||
const ShaderType stage;
|
const ShaderType stage;
|
||||||
const Tegra::Shader::Header header;
|
const Tegra::Shader::Header header;
|
||||||
|
const Registry& registry;
|
||||||
const Specialization& specialization;
|
const Specialization& specialization;
|
||||||
|
std::unordered_map<u8, VaryingTFB> transform_feedback;
|
||||||
|
|
||||||
const Id t_void = Name(TypeVoid(), "void");
|
const Id t_void = Name(TypeVoid(), "void");
|
||||||
|
|
||||||
@ -2584,7 +2673,7 @@ private:
|
|||||||
Id shared_memory{};
|
Id shared_memory{};
|
||||||
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
|
std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
|
||||||
std::map<Attribute::Index, Id> input_attributes;
|
std::map<Attribute::Index, Id> input_attributes;
|
||||||
std::map<Attribute::Index, Id> output_attributes;
|
std::unordered_map<u8, GenericVaryingDescription> output_attributes;
|
||||||
std::map<u32, Id> constant_buffers;
|
std::map<u32, Id> constant_buffers;
|
||||||
std::map<GlobalMemoryBase, Id> global_buffers;
|
std::map<GlobalMemoryBase, Id> global_buffers;
|
||||||
std::map<u32, TexelBuffer> texel_buffers;
|
std::map<u32, TexelBuffer> texel_buffers;
|
||||||
@ -2870,8 +2959,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
ShaderType stage, const Specialization& specialization) {
|
ShaderType stage, const VideoCommon::Shader::Registry& registry,
|
||||||
return SPIRVDecompiler(device, ir, stage, specialization).Assemble();
|
const Specialization& specialization) {
|
||||||
|
return SPIRVDecompiler(device, ir, stage, registry, specialization).Assemble();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
|
#include "video_core/shader/registry.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
@ -91,17 +92,9 @@ struct Specialization final {
|
|||||||
u32 shared_memory_size{};
|
u32 shared_memory_size{};
|
||||||
|
|
||||||
// Graphics specific
|
// Graphics specific
|
||||||
Maxwell::PrimitiveTopology primitive_topology{};
|
|
||||||
std::optional<float> point_size{};
|
std::optional<float> point_size{};
|
||||||
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
std::array<Maxwell::VertexAttribute::Type, Maxwell::NumVertexAttributes> attribute_types{};
|
||||||
bool ndc_minus_one_to_one{};
|
bool ndc_minus_one_to_one{};
|
||||||
|
|
||||||
// Tessellation specific
|
|
||||||
struct {
|
|
||||||
Maxwell::TessellationPrimitive primitive{};
|
|
||||||
Maxwell::TessellationSpacing spacing{};
|
|
||||||
bool clockwise{};
|
|
||||||
} tessellation;
|
|
||||||
};
|
};
|
||||||
// Old gcc versions don't consider this trivially copyable.
|
// Old gcc versions don't consider this trivially copyable.
|
||||||
// static_assert(std::is_trivially_copyable_v<Specialization>);
|
// static_assert(std::is_trivially_copyable_v<Specialization>);
|
||||||
@ -114,6 +107,8 @@ struct SPIRVShader {
|
|||||||
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
|
ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir);
|
||||||
|
|
||||||
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
std::vector<u32> Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
|
||||||
Tegra::Engines::ShaderType stage, const Specialization& specialization);
|
Tegra::Engines::ShaderType stage,
|
||||||
|
const VideoCommon::Shader::Registry& registry,
|
||||||
|
const Specialization& specialization);
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
115
src/video_core/shader/transform_feedback.cpp
Normal file
115
src/video_core/shader/transform_feedback.cpp
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/shader/registry.h"
|
||||||
|
#include "video_core/shader/transform_feedback.h"
|
||||||
|
|
||||||
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
|
// TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
|
||||||
|
|
||||||
|
/// Attribute offsets that describe a vector
|
||||||
|
constexpr std::array VECTORS = {
|
||||||
|
28, // gl_Position
|
||||||
|
32, // Generic 0
|
||||||
|
36, // Generic 1
|
||||||
|
40, // Generic 2
|
||||||
|
44, // Generic 3
|
||||||
|
48, // Generic 4
|
||||||
|
52, // Generic 5
|
||||||
|
56, // Generic 6
|
||||||
|
60, // Generic 7
|
||||||
|
64, // Generic 8
|
||||||
|
68, // Generic 9
|
||||||
|
72, // Generic 10
|
||||||
|
76, // Generic 11
|
||||||
|
80, // Generic 12
|
||||||
|
84, // Generic 13
|
||||||
|
88, // Generic 14
|
||||||
|
92, // Generic 15
|
||||||
|
96, // Generic 16
|
||||||
|
100, // Generic 17
|
||||||
|
104, // Generic 18
|
||||||
|
108, // Generic 19
|
||||||
|
112, // Generic 20
|
||||||
|
116, // Generic 21
|
||||||
|
120, // Generic 22
|
||||||
|
124, // Generic 23
|
||||||
|
128, // Generic 24
|
||||||
|
132, // Generic 25
|
||||||
|
136, // Generic 26
|
||||||
|
140, // Generic 27
|
||||||
|
144, // Generic 28
|
||||||
|
148, // Generic 29
|
||||||
|
152, // Generic 30
|
||||||
|
156, // Generic 31
|
||||||
|
160, // gl_FrontColor
|
||||||
|
164, // gl_FrontSecondaryColor
|
||||||
|
160, // gl_BackColor
|
||||||
|
164, // gl_BackSecondaryColor
|
||||||
|
192, // gl_TexCoord[0]
|
||||||
|
196, // gl_TexCoord[1]
|
||||||
|
200, // gl_TexCoord[2]
|
||||||
|
204, // gl_TexCoord[3]
|
||||||
|
208, // gl_TexCoord[4]
|
||||||
|
212, // gl_TexCoord[5]
|
||||||
|
216, // gl_TexCoord[6]
|
||||||
|
220, // gl_TexCoord[7]
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
|
||||||
|
|
||||||
|
std::unordered_map<u8, VaryingTFB> tfb;
|
||||||
|
|
||||||
|
for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
|
||||||
|
const auto& locations = info.tfb_varying_locs[buffer];
|
||||||
|
const auto& layout = info.tfb_layouts[buffer];
|
||||||
|
const std::size_t varying_count = layout.varying_count;
|
||||||
|
|
||||||
|
std::size_t highest = 0;
|
||||||
|
|
||||||
|
for (std::size_t offset = 0; offset < varying_count; ++offset) {
|
||||||
|
const std::size_t base_offset = offset;
|
||||||
|
const u8 location = locations[offset];
|
||||||
|
|
||||||
|
VaryingTFB varying;
|
||||||
|
varying.buffer = layout.stream;
|
||||||
|
varying.stride = layout.stride;
|
||||||
|
varying.offset = offset * sizeof(u32);
|
||||||
|
varying.components = 1;
|
||||||
|
|
||||||
|
if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
|
||||||
|
UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
|
||||||
|
|
||||||
|
const u8 base_index = location / 4;
|
||||||
|
while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
|
||||||
|
++offset;
|
||||||
|
++varying.components;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
|
||||||
|
UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
|
||||||
|
|
||||||
|
highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
|
||||||
|
}
|
||||||
|
|
||||||
|
UNIMPLEMENTED_IF(highest != layout.stride);
|
||||||
|
}
|
||||||
|
return tfb;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Shader
|
23
src/video_core/shader/transform_feedback.h
Normal file
23
src/video_core/shader/transform_feedback.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/shader/registry.h"
|
||||||
|
|
||||||
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
struct VaryingTFB {
|
||||||
|
std::size_t buffer;
|
||||||
|
std::size_t stride;
|
||||||
|
std::size_t offset;
|
||||||
|
std::size_t components;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Shader
|
Loading…
Reference in New Issue
Block a user