astc_decoder: Combine FastReplicate functions to work around new NV driver bug
The new Nvidia drivers have a bug where the FastReplicateTo6 function produces a lookup into the REPLICATE_TO_8 table rather than the REPLICATE_TO_6 table. This seems to be an optimization gone wrong. Combining the logic of the FastReplicate functions seems to address the bug.
This commit is contained in:
parent
480b03b645
commit
a5bff8e9b3
@ -155,9 +155,6 @@ uint SwizzleOffset(uvec2 pos) {
|
||||
// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)]
|
||||
// is the same as [(num_bits - 1):0] and repeats all the way down.
|
||||
uint Replicate(uint val, uint num_bits, uint to_bit) {
|
||||
if (num_bits == 0 || to_bit == 0) {
|
||||
return 0;
|
||||
}
|
||||
const uint v = val & uint((1 << num_bits) - 1);
|
||||
uint res = v;
|
||||
uint reslen = num_bits;
|
||||
@ -187,42 +184,57 @@ uint ReplicateBitTo9(uint value) {
|
||||
return REPLICATE_1_BIT_TO_9_TABLE[value];
|
||||
}
|
||||
|
||||
uint FastReplicateTo8(uint value, uint num_bits) {
|
||||
switch (num_bits) {
|
||||
case 1:
|
||||
return REPLICATE_1_BIT_TO_8_TABLE[value];
|
||||
case 2:
|
||||
return REPLICATE_2_BIT_TO_8_TABLE[value];
|
||||
case 3:
|
||||
return REPLICATE_3_BIT_TO_8_TABLE[value];
|
||||
case 4:
|
||||
return REPLICATE_4_BIT_TO_8_TABLE[value];
|
||||
case 5:
|
||||
return REPLICATE_5_BIT_TO_8_TABLE[value];
|
||||
case 6:
|
||||
return REPLICATE_6_BIT_TO_8_TABLE[value];
|
||||
case 7:
|
||||
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
||||
case 8:
|
||||
uint FastReplicate(uint value, uint num_bits, uint to_bit) {
|
||||
if (num_bits == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (num_bits == to_bit) {
|
||||
return value;
|
||||
}
|
||||
return Replicate(value, num_bits, 8);
|
||||
if (to_bit == 6) {
|
||||
switch (num_bits) {
|
||||
case 1:
|
||||
return REPLICATE_1_BIT_TO_6_TABLE[value];
|
||||
case 2:
|
||||
return REPLICATE_2_BIT_TO_6_TABLE[value];
|
||||
case 3:
|
||||
return REPLICATE_3_BIT_TO_6_TABLE[value];
|
||||
case 4:
|
||||
return REPLICATE_4_BIT_TO_6_TABLE[value];
|
||||
case 5:
|
||||
return REPLICATE_5_BIT_TO_6_TABLE[value];
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else { /* if (to_bit == 8) */
|
||||
switch (num_bits) {
|
||||
case 1:
|
||||
return REPLICATE_1_BIT_TO_8_TABLE[value];
|
||||
case 2:
|
||||
return REPLICATE_2_BIT_TO_8_TABLE[value];
|
||||
case 3:
|
||||
return REPLICATE_3_BIT_TO_8_TABLE[value];
|
||||
case 4:
|
||||
return REPLICATE_4_BIT_TO_8_TABLE[value];
|
||||
case 5:
|
||||
return REPLICATE_5_BIT_TO_8_TABLE[value];
|
||||
case 6:
|
||||
return REPLICATE_6_BIT_TO_8_TABLE[value];
|
||||
case 7:
|
||||
return REPLICATE_7_BIT_TO_8_TABLE[value];
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Replicate(value, num_bits, to_bit);
|
||||
}
|
||||
|
||||
uint FastReplicateTo8(uint value, uint num_bits) {
|
||||
return FastReplicate(value, num_bits, 8);
|
||||
}
|
||||
|
||||
uint FastReplicateTo6(uint value, uint num_bits) {
|
||||
switch (num_bits) {
|
||||
case 1:
|
||||
return REPLICATE_1_BIT_TO_6_TABLE[value];
|
||||
case 2:
|
||||
return REPLICATE_2_BIT_TO_6_TABLE[value];
|
||||
case 3:
|
||||
return REPLICATE_3_BIT_TO_6_TABLE[value];
|
||||
case 4:
|
||||
return REPLICATE_4_BIT_TO_6_TABLE[value];
|
||||
case 5:
|
||||
return REPLICATE_5_BIT_TO_6_TABLE[value];
|
||||
}
|
||||
return Replicate(value, num_bits, 6);
|
||||
return FastReplicate(value, num_bits, 6);
|
||||
}
|
||||
|
||||
uint Div3Floor(uint v) {
|
||||
|
Loading…
Reference in New Issue
Block a user