Texture Cache: Add HLE methods for building 3D textures within the GPU in certain scenarios.
This commit adds a series of HLE methods for handling 3D textures in general. This helps games that generate 3D textures on every frame and may reduce loading times for certain games.
This commit is contained in:
parent
aea978e037
commit
51c9e98677
@ -392,4 +392,42 @@ std::string SurfaceParams::TargetName() const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 SurfaceParams::GetBlockSize() const {
|
||||||
|
const u32 x = 64U << block_width;
|
||||||
|
const u32 y = 8U << block_height;
|
||||||
|
const u32 z = 1U << block_depth;
|
||||||
|
return x * y * z;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<u32, u32> SurfaceParams::GetBlockXY() const {
|
||||||
|
const u32 x_pixels = 64U / GetBytesPerPixel();
|
||||||
|
const u32 x = x_pixels << block_width;
|
||||||
|
const u32 y = 8U << block_height;
|
||||||
|
return {x, y};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<u32, u32, u32> SurfaceParams::GetBlockOffsetXYZ(u32 offset) const {
|
||||||
|
const auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); };
|
||||||
|
const u32 block_size = GetBlockSize();
|
||||||
|
const u32 block_index = offset / block_size;
|
||||||
|
const u32 gob_offset = offset % block_size;
|
||||||
|
const u32 gob_index = gob_offset / static_cast<u32>(Tegra::Texture::GetGOBSize());
|
||||||
|
const u32 x_gob_pixels = 64U / GetBytesPerPixel();
|
||||||
|
const u32 x_block_pixels = x_gob_pixels << block_width;
|
||||||
|
const u32 y_block_pixels = 8U << block_height;
|
||||||
|
const u32 z_block_pixels = 1U << block_depth;
|
||||||
|
const u32 x_blocks = div_ceil(width, x_block_pixels);
|
||||||
|
const u32 y_blocks = div_ceil(height, y_block_pixels);
|
||||||
|
const u32 z_blocks = div_ceil(depth, z_block_pixels);
|
||||||
|
const u32 base_x = block_index % x_blocks;
|
||||||
|
const u32 base_y = (block_index / x_blocks) % y_blocks;
|
||||||
|
const u32 base_z = (block_index / (x_blocks * y_blocks)) % z_blocks;
|
||||||
|
u32 x = base_x * x_block_pixels;
|
||||||
|
u32 y = base_y * y_block_pixels;
|
||||||
|
u32 z = base_z * z_block_pixels;
|
||||||
|
z += gob_index >> block_height;
|
||||||
|
y += (gob_index * 8U) % y_block_pixels;
|
||||||
|
return {x, y, z};
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
@ -4,6 +4,8 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/bit_util.h"
|
#include "common/bit_util.h"
|
||||||
#include "common/cityhash.h"
|
#include "common/cityhash.h"
|
||||||
@ -136,6 +138,15 @@ public:
|
|||||||
|
|
||||||
std::size_t GetConvertedMipmapSize(u32 level) const;
|
std::size_t GetConvertedMipmapSize(u32 level) const;
|
||||||
|
|
||||||
|
// Get this texture Tegra Block size in guest memory layout
|
||||||
|
u32 GetBlockSize() const;
|
||||||
|
|
||||||
|
// Get X, Y sizes of a block
|
||||||
|
std::pair<u32, u32> GetBlockXY() const;
|
||||||
|
|
||||||
|
// Get the offset in x, y, z coordinates from a memory offset
|
||||||
|
std::tuple<u32, u32, u32> GetBlockOffsetXYZ(u32 offset) const;
|
||||||
|
|
||||||
/// Returns the size of a layer in bytes in guest memory.
|
/// Returns the size of a layer in bytes in guest memory.
|
||||||
std::size_t GetGuestLayerSize() const {
|
std::size_t GetGuestLayerSize() const {
|
||||||
return GetLayerSize(false, false);
|
return GetLayerSize(false, false);
|
||||||
@ -269,7 +280,8 @@ private:
|
|||||||
|
|
||||||
/// Returns the size of all mipmap levels and aligns as needed.
|
/// Returns the size of all mipmap levels and aligns as needed.
|
||||||
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
|
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const {
|
||||||
return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth);
|
return GetLayerSize(as_host_size, uncompressed) *
|
||||||
|
(layer_only ? 1U : (is_layered ? depth : 1U));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the size of a layer
|
/// Returns the size of a layer
|
||||||
|
@ -615,6 +615,85 @@ private:
|
|||||||
return {{new_surface, new_surface->GetMainView()}};
|
return {{new_surface, new_surface->GetMainView()}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes care of managing 3D textures and its slices. Does some HLE methods when possible.
|
||||||
|
* Fallsback to LLE when it isn't possible.
|
||||||
|
*
|
||||||
|
* @param overlaps The overlapping surfaces registered in the cache.
|
||||||
|
* @param params The parameters on the new surface.
|
||||||
|
* @param gpu_addr The starting address of the new surface.
|
||||||
|
* @param cache_addr The starting address of the new surface on physical memory.
|
||||||
|
* @param preserve_contents Indicates that the new surface should be loaded from memory or
|
||||||
|
* left blank.
|
||||||
|
*/
|
||||||
|
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps,
|
||||||
|
const SurfaceParams& params,
|
||||||
|
const GPUVAddr gpu_addr,
|
||||||
|
const CacheAddr cache_addr,
|
||||||
|
bool preserve_contents) {
|
||||||
|
if (params.target == SurfaceTarget::Texture3D) {
|
||||||
|
bool failed = false;
|
||||||
|
if (params.num_levels > 1) {
|
||||||
|
// We can't handle mipmaps in 3D textures yet, better fallback to LLE approach
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
TSurface new_surface = GetUncachedSurface(gpu_addr, params);
|
||||||
|
bool modified = false;
|
||||||
|
for (auto& surface : overlaps) {
|
||||||
|
const SurfaceParams& src_params = surface->GetSurfaceParams();
|
||||||
|
if (src_params.target != SurfaceTarget::Texture2D) {
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (src_params.height != params.height) {
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (src_params.block_depth != params.block_depth ||
|
||||||
|
src_params.block_height != params.block_height) {
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr);
|
||||||
|
const auto [x, y, z] = params.GetBlockOffsetXYZ(offset);
|
||||||
|
modified |= surface->IsModified();
|
||||||
|
const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height,
|
||||||
|
1);
|
||||||
|
ImageCopy(surface, new_surface, copy_params);
|
||||||
|
}
|
||||||
|
if (failed) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
for (const auto& surface : overlaps) {
|
||||||
|
Unregister(surface);
|
||||||
|
}
|
||||||
|
new_surface->MarkAsModified(modified, Tick());
|
||||||
|
Register(new_surface);
|
||||||
|
return {{new_surface, new_surface->GetMainView()}};
|
||||||
|
} else {
|
||||||
|
for (const auto& surface : overlaps) {
|
||||||
|
if (!surface->MatchTarget(params.target)) {
|
||||||
|
if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) {
|
||||||
|
if (Settings::values.use_accurate_gpu_emulation) {
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
Unregister(surface);
|
||||||
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
if (surface->GetCacheAddr() != cache_addr) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto struct_result = surface->MatchesStructure(params);
|
||||||
|
if (struct_result == MatchStructureResult::FullMatch) {
|
||||||
|
return {{surface, surface->GetMainView()}};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return InitializeSurface(gpu_addr, params, preserve_contents);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the starting address and parameters of a candidate surface and tries
|
* Gets the starting address and parameters of a candidate surface and tries
|
||||||
* to find a matching surface within the cache. This is done in 3 big steps:
|
* to find a matching surface within the cache. This is done in 3 big steps:
|
||||||
@ -687,6 +766,15 @@ private:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Look if it's a 3D texture
|
||||||
|
if (params.block_depth > 0) {
|
||||||
|
std::optional<std::pair<TSurface, TView>> surface =
|
||||||
|
Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents);
|
||||||
|
if (surface) {
|
||||||
|
return *surface;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Split cases between 1 overlap or many.
|
// Split cases between 1 overlap or many.
|
||||||
if (overlaps.size() == 1) {
|
if (overlaps.size() == 1) {
|
||||||
TSurface current_surface = overlaps[0];
|
TSurface current_surface = overlaps[0];
|
||||||
|
@ -12,6 +12,10 @@ namespace Tegra::Texture {
|
|||||||
|
|
||||||
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
|
// GOBSize constant. Calculated by 64 bytes in x multiplied by 8 y coords, represents
|
||||||
// an small rect of (64/bytes_per_pixel)X8.
|
// an small rect of (64/bytes_per_pixel)X8.
|
||||||
|
inline std::size_t GetGOBSize() {
|
||||||
|
return 512;
|
||||||
|
}
|
||||||
|
|
||||||
inline std::size_t GetGOBSizeShift() {
|
inline std::size_t GetGOBSizeShift() {
|
||||||
return 9;
|
return 9;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user