swscale/optimizer: add packed shuffle solver

This can turn any compatible sequence of operations into a single packed
shuffle, including packed swizzling, grayscale->RGB conversion, endianness
swapping, RGB bit depth conversions, rgb24->rgb0 alpha clearing and more.
This commit is contained in:
Niklas Haas
2025-05-21 13:49:37 +02:00
parent db2bc11a97
commit 696f3be322
2 changed files with 123 additions and 0 deletions

View File

@@ -130,4 +130,32 @@ int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
*/
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out);
/**
* "Solve" an op list into a fixed shuffle mask, with an optional ability to
* also directly clear the output value (for e.g. rgb24 -> rgb0). This can
* accept any operation chain that only consists of the following operations:
*
* - SWS_OP_READ (non-planar, non-fractional)
* - SWS_OP_SWIZZLE
* - SWS_OP_SWAP_BYTES
* - SWS_OP_CLEAR to zero (when clear_val is specified)
* - SWS_OP_CONVERT (integer expand)
* - SWS_OP_WRITE (non-planar, non-fractional)
*
* Basically, any operation that purely consists of moving around and reordering
* bytes within a single plane, can be turned into a shuffle mask.
*
* @param ops The operation list to decompose.
* @param shuffle The output shuffle mask.
* @param size The size (in bytes) of the output shuffle mask.
* @param clear_val If nonzero, this index will be used to clear the output.
* @param read_bytes Returns the number of bytes read per shuffle iteration.
* @param write_bytes Returns the number of bytes written per shuffle iteration.
*
* @return The number of pixels processed per iteration, or a negative error
code; in particular AVERROR(ENOTSUP) for unsupported operations.
*/
int ff_sws_solve_shuffle(const SwsOpList *ops, uint8_t shuffle[], int size,
uint8_t clear_val, int *read_bytes, int *write_bytes);
#endif /* SWSCALE_OPS_INTERNAL_H */

View File

@@ -19,6 +19,7 @@
*/
#include "libavutil/avassert.h"
#include <libavutil/bswap.h>
#include "libavutil/rational.h"
#include "ops.h"
@@ -769,3 +770,97 @@ retry:
return 0;
}
int ff_sws_solve_shuffle(const SwsOpList *const ops, uint8_t shuffle[],
int size, uint8_t clear_val,
int *read_bytes, int *write_bytes)
{
const SwsOp read = ops->ops[0];
const int read_size = ff_sws_pixel_type_size(read.type);
uint32_t mask[4] = {0};
if (!ops->num_ops || read.op != SWS_OP_READ)
return AVERROR(EINVAL);
if (read.rw.frac || (!read.rw.packed && read.rw.elems > 1))
return AVERROR(ENOTSUP);
for (int i = 0; i < read.rw.elems; i++)
mask[i] = 0x01010101 * i * read_size + 0x03020100;
for (int opidx = 1; opidx < ops->num_ops; opidx++) {
const SwsOp *op = &ops->ops[opidx];
switch (op->op) {
case SWS_OP_SWIZZLE: {
uint32_t orig[4] = { mask[0], mask[1], mask[2], mask[3] };
for (int i = 0; i < 4; i++)
mask[i] = orig[op->swizzle.in[i]];
break;
}
case SWS_OP_SWAP_BYTES:
for (int i = 0; i < 4; i++) {
switch (ff_sws_pixel_type_size(op->type)) {
case 2: mask[i] = av_bswap16(mask[i]); break;
case 4: mask[i] = av_bswap32(mask[i]); break;
}
}
break;
case SWS_OP_CLEAR:
for (int i = 0; i < 4; i++) {
if (!op->c.q4[i].den)
continue;
if (op->c.q4[i].num != 0 || !clear_val)
return AVERROR(ENOTSUP);
mask[i] = 0x1010101ul * clear_val;
}
break;
case SWS_OP_CONVERT: {
if (!op->convert.expand)
return AVERROR(ENOTSUP);
for (int i = 0; i < 4; i++) {
switch (ff_sws_pixel_type_size(op->type)) {
case 1: mask[i] = 0x01010101 * (mask[i] & 0xFF); break;
case 2: mask[i] = 0x00010001 * (mask[i] & 0xFFFF); break;
}
}
break;
}
case SWS_OP_WRITE: {
if (op->rw.frac || (!op->rw.packed && op->rw.elems > 1))
return AVERROR(ENOTSUP);
/* Initialize to no-op */
memset(shuffle, clear_val, size);
const int write_size = ff_sws_pixel_type_size(op->type);
const int read_chunk = read.rw.elems * read_size;
const int write_chunk = op->rw.elems * write_size;
const int num_groups = size / FFMAX(read_chunk, write_chunk);
for (int n = 0; n < num_groups; n++) {
const int base_in = n * read_chunk;
const int base_out = n * write_chunk;
for (int i = 0; i < op->rw.elems; i++) {
const int offset = base_out + i * write_size;
for (int b = 0; b < write_size; b++) {
const uint8_t idx = mask[i] >> (b * 8);
if (idx != clear_val)
shuffle[offset + b] = base_in + idx;
}
}
}
*read_bytes = num_groups * read_chunk;
*write_bytes = num_groups * write_chunk;
return num_groups;
}
default:
return AVERROR(ENOTSUP);
}
}
return AVERROR(EINVAL);
}