mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-01-12 00:06:51 +08:00
swscale/x86/ops_float: store and load per row dither offset directly
Instead of computing y + N with a hard-coded index offset, calculate the relative offset as a 16-bit integer in C and add that to the pointer directly. Since we no longer mask the resulting combined address, this may result in overread, but that's fine since we over-provisioned the array in the previous commit.
This commit is contained in:
@@ -215,6 +215,13 @@ static int setup_dither(const SwsOp *op, SwsOpPriv *out)
|
||||
|
||||
memcpy(&matrix[size * size], matrix, max_offset * stride);
|
||||
|
||||
/* Store relative pointer offset to each row inside extra space */
|
||||
static_assert(sizeof(out->ptr) <= sizeof(uint16_t[4]), ">8 byte pointers not supported");
|
||||
assert(max_offset * stride <= UINT16_MAX);
|
||||
uint16_t *offset = &out->u16[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
offset[i] = (op->dither.y_offset[i] & (size - 1)) * stride;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -245,6 +245,9 @@ endstruc
|
||||
%define tmp0d r4d
|
||||
%define tmp1d r5d
|
||||
|
||||
%define tmp0w r4w
|
||||
%define tmp1w r5w
|
||||
|
||||
; Registers for plane pointers; put at the end (and in ascending plane order)
|
||||
; so that we can avoid reserving them when not necessary
|
||||
%define out0q r6q
|
||||
|
||||
@@ -179,10 +179,8 @@ IF W, mulps mw2, m8
|
||||
CONTINUE tmp0q
|
||||
%endmacro
|
||||
|
||||
%macro load_dither_row 5 ; size_log2, y, addr, out, out2
|
||||
lea tmp0q, %2
|
||||
and tmp0q, (1 << %1) - 1
|
||||
shl tmp0q, %1+2
|
||||
%macro load_dither_row 5 ; size_log2, comp_idx, addr, out, out2
|
||||
mov tmp0w, [implq + SwsOpImpl.priv + (4 + %2) * 2] ; priv.u16[4 + i]
|
||||
%if %1 == 1
|
||||
vbroadcastsd %4, [%3 + tmp0q]
|
||||
%elif %1 == 2
|
||||
@@ -225,6 +223,11 @@ op dither%1
|
||||
%endif
|
||||
; dither matrix is stored indirectly at the private data address
|
||||
mov tmp1q, [implq + SwsOpImpl.priv]
|
||||
; add y offset
|
||||
mov tmp0d, yd
|
||||
and tmp0d, (1 << %1) - 1
|
||||
shl tmp0d, %1 + 2 ; * sizeof(float)
|
||||
add tmp1q, tmp0q
|
||||
%if (4 << %1) > 2 * mmsize
|
||||
; need to add in x offset
|
||||
mov tmp0d, bxd
|
||||
@@ -232,10 +235,10 @@ op dither%1
|
||||
and tmp0d, (4 << %1) - 1
|
||||
add tmp1q, tmp0q
|
||||
%endif
|
||||
IF X, load_dither_row %1, [yd + 0], tmp1q, DX, DX2
|
||||
IF Y, load_dither_row %1, [yd + 3], tmp1q, DY, DY2
|
||||
IF Z, load_dither_row %1, [yd + 2], tmp1q, DZ, DZ2
|
||||
IF W, load_dither_row %1, [yd + 5], tmp1q, DW, DW2
|
||||
IF X, load_dither_row %1, 0, tmp1q, DX, DX2
|
||||
IF Y, load_dither_row %1, 1, tmp1q, DY, DY2
|
||||
IF Z, load_dither_row %1, 2, tmp1q, DZ, DZ2
|
||||
IF W, load_dither_row %1, 3, tmp1q, DW, DW2
|
||||
%endif
|
||||
LOAD_CONT tmp0q
|
||||
IF X, addps mx, DX
|
||||
|
||||
Reference in New Issue
Block a user