mirror of
https://github.com/FFmpeg/FFmpeg.git
synced 2026-01-12 00:06:51 +08:00
359 lines
10 KiB
Plaintext
359 lines
10 KiB
Plaintext
/*
|
|
* FFv1 codec
|
|
*
|
|
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef GOLOMB
|
|
#ifdef CACHED_SYMBOL_READER
|
|
shared uint8_t state[CONTEXT_SIZE];
|
|
#define WRITE(c, off, val) put_rac_direct(c, state[off], val)
|
|
#else
|
|
#define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val)
|
|
#endif
|
|
|
|
/* Note - only handles signed values */
|
|
void put_symbol(inout RangeCoder c, uint state_off, int v)
|
|
{
|
|
bool is_nil = (v == 0);
|
|
WRITE(c, 0, is_nil);
|
|
if (is_nil)
|
|
return;
|
|
|
|
const int a = abs(v);
|
|
const int e = findMSB(a);
|
|
|
|
for (int i = 0; i < e; i++)
|
|
WRITE(c, 1 + min(i, 9), true);
|
|
WRITE(c, 1 + min(e, 9), false);
|
|
|
|
for (int i = e - 1; i >= 0; i--)
|
|
WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1)));
|
|
|
|
WRITE(c, 22 - 11 + min(e, 10), v < 0);
|
|
}
|
|
|
|
void encode_line_pcm(inout SliceContext sc, readonly uimage2D img,
|
|
ivec2 sp, int y, int p, int comp, int bits)
|
|
{
|
|
int w = sc.slice_dim.x;
|
|
|
|
#ifdef CACHED_SYMBOL_READER
|
|
if (gl_LocalInvocationID.x > 0)
|
|
return;
|
|
#endif
|
|
|
|
#ifndef RGB
|
|
if (p > 0 && p < 3) {
|
|
w = ceil_rshift(w, chroma_shift.x);
|
|
sp >>= chroma_shift;
|
|
}
|
|
#endif
|
|
|
|
for (int x = 0; x < w; x++) {
|
|
uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp];
|
|
for (int i = (bits - 1); i >= 0; i--)
|
|
put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1)));
|
|
}
|
|
}
|
|
|
|
void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
|
|
ivec2 sp, int y, int p, int comp, int bits,
|
|
uint8_t quant_table_idx, const int run_index)
|
|
{
|
|
int w = sc.slice_dim.x;
|
|
|
|
#ifndef RGB
|
|
if (p > 0 && p < 3) {
|
|
w = ceil_rshift(w, chroma_shift.x);
|
|
sp >>= chroma_shift;
|
|
}
|
|
#endif
|
|
|
|
for (int x = 0; x < w; x++) {
|
|
ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
|
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
|
d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
|
|
|
|
if (d[0] < 0)
|
|
d = -d;
|
|
|
|
d[1] = fold(d[1], bits);
|
|
|
|
uint context_off = state_off + CONTEXT_SIZE*d[0];
|
|
#ifdef CACHED_SYMBOL_READER
|
|
u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);
|
|
state[gl_LocalInvocationID.x] = sb.v;
|
|
barrier();
|
|
if (gl_LocalInvocationID.x == 0)
|
|
#endif
|
|
|
|
put_symbol(sc.c, context_off, d[1]);
|
|
|
|
#ifdef CACHED_SYMBOL_READER
|
|
barrier();
|
|
sb.v = state[gl_LocalInvocationID.x];
|
|
#endif
|
|
}
|
|
}
|
|
|
|
#else /* GOLOMB */
|
|
|
|
void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off,
|
|
ivec2 sp, int y, int p, int comp, int bits,
|
|
uint8_t quant_table_idx, inout int run_index)
|
|
{
|
|
int w = sc.slice_dim.x;
|
|
|
|
#ifndef RGB
|
|
if (p > 0 && p < 3) {
|
|
w = ceil_rshift(w, chroma_shift.x);
|
|
sp >>= chroma_shift;
|
|
}
|
|
#endif
|
|
|
|
int run_count = 0;
|
|
bool run_mode = false;
|
|
|
|
for (int x = 0; x < w; x++) {
|
|
ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w,
|
|
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
|
d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1];
|
|
|
|
if (d[0] < 0)
|
|
d = -d;
|
|
|
|
d[1] = fold(d[1], bits);
|
|
|
|
if (d[0] == 0)
|
|
run_mode = true;
|
|
|
|
if (run_mode) {
|
|
if (d[1] != 0) {
|
|
/* A very unlikely loop */
|
|
while (run_count >= 1 << log2_run[run_index]) {
|
|
run_count -= 1 << log2_run[run_index];
|
|
run_index++;
|
|
put_bits(sc.pb, 1, 1);
|
|
}
|
|
|
|
put_bits(sc.pb, 1 + log2_run[run_index], run_count);
|
|
if (run_index != 0)
|
|
run_index--;
|
|
run_count = 0;
|
|
run_mode = false;
|
|
if (d[1] > 0)
|
|
d[1]--;
|
|
} else {
|
|
run_count++;
|
|
}
|
|
}
|
|
|
|
if (!run_mode) {
|
|
VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]);
|
|
Symbol sym = get_vlc_symbol(sb, d[1], bits);
|
|
put_bits(sc.pb, sym.bits, sym.val);
|
|
}
|
|
}
|
|
|
|
if (run_mode) {
|
|
while (run_count >= (1 << log2_run[run_index])) {
|
|
run_count -= 1 << log2_run[run_index];
|
|
run_index++;
|
|
put_bits(sc.pb, 1, 1);
|
|
}
|
|
|
|
if (run_count > 0)
|
|
put_bits(sc.pb, 1, 1);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef RGB
|
|
ivec4 load_components(ivec2 pos)
|
|
{
|
|
ivec4 pix = ivec4(imageLoad(src[0], pos));
|
|
if (planar_rgb != 0) {
|
|
for (int i = 1; i < (3 + transparency); i++)
|
|
pix[i] = int(imageLoad(src[i], pos)[0]);
|
|
}
|
|
|
|
return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]],
|
|
pix[fmt_lut[2]], pix[fmt_lut[3]]);
|
|
}
|
|
|
|
void transform_sample(inout ivec4 pix, ivec2 rct_coef)
|
|
{
|
|
pix.b -= pix.g;
|
|
pix.r -= pix.g;
|
|
pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2;
|
|
pix.b += rct_offset;
|
|
pix.r += rct_offset;
|
|
}
|
|
|
|
void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
|
|
{
|
|
for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) {
|
|
ivec2 lpos = sp + LADDR(ivec2(x, y));
|
|
ivec2 pos = sc.slice_pos + ivec2(x, y);
|
|
|
|
ivec4 pix = load_components(pos);
|
|
|
|
if (expectEXT(apply_rct, true))
|
|
transform_sample(pix, sc.slice_rct_coef);
|
|
|
|
imageStore(tmp, lpos, pix);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
void encode_slice(inout SliceContext sc, const uint slice_idx)
|
|
{
|
|
ivec2 sp = sc.slice_pos;
|
|
|
|
#ifndef RGB
|
|
int bits = bits_per_raw_sample;
|
|
#else
|
|
int bits = 9;
|
|
if (bits != 8 || sc.slice_coding_mode != 0)
|
|
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
|
|
|
sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
|
|
#endif
|
|
|
|
#ifndef GOLOMB
|
|
if (sc.slice_coding_mode == 1) {
|
|
#ifndef RGB
|
|
for (int c = 0; c < components; c++) {
|
|
|
|
int h = sc.slice_dim.y;
|
|
if (c > 0 && c < 3)
|
|
h = ceil_rshift(h, chroma_shift.y);
|
|
|
|
/* Takes into account dual-plane YUV formats */
|
|
int p = min(c, planes - 1);
|
|
int comp = c - p;
|
|
|
|
for (int y = 0; y < h; y++)
|
|
encode_line_pcm(sc, src[p], sp, y, p, comp, bits);
|
|
}
|
|
#else
|
|
for (int y = 0; y < sc.slice_dim.y; y++) {
|
|
preload_rgb(sc, sp, sc.slice_dim.x, y, false);
|
|
|
|
encode_line_pcm(sc, tmp, sp, y, 0, 1, bits);
|
|
encode_line_pcm(sc, tmp, sp, y, 0, 2, bits);
|
|
encode_line_pcm(sc, tmp, sp, y, 0, 0, bits);
|
|
if (transparency == 1)
|
|
encode_line_pcm(sc, tmp, sp, y, 0, 3, bits);
|
|
}
|
|
#endif
|
|
} else
|
|
#endif
|
|
{
|
|
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
|
|
u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;
|
|
|
|
#ifndef RGB
|
|
for (int c = 0; c < components; c++) {
|
|
int run_index = 0;
|
|
|
|
int h = sc.slice_dim.y;
|
|
if (c > 0 && c < 3)
|
|
h = ceil_rshift(h, chroma_shift.y);
|
|
|
|
int p = min(c, planes - 1);
|
|
int comp = c - p;
|
|
|
|
for (int y = 0; y < h; y++)
|
|
encode_line(sc, src[p], slice_state_off[c], sp, y, p,
|
|
comp, bits, quant_table_idx[c], run_index);
|
|
}
|
|
#else
|
|
int run_index = 0;
|
|
for (int y = 0; y < sc.slice_dim.y; y++) {
|
|
preload_rgb(sc, sp, sc.slice_dim.x, y, true);
|
|
|
|
encode_line(sc, tmp, slice_state_off[0],
|
|
sp, y, 0, 1, bits, quant_table_idx[0], run_index);
|
|
encode_line(sc, tmp, slice_state_off[1],
|
|
sp, y, 0, 2, bits, quant_table_idx[1], run_index);
|
|
encode_line(sc, tmp, slice_state_off[2],
|
|
sp, y, 0, 0, bits, quant_table_idx[2], run_index);
|
|
if (transparency == 1)
|
|
encode_line(sc, tmp, slice_state_off[3],
|
|
sp, y, 0, 3, bits, quant_table_idx[3], run_index);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
void finalize_slice(inout SliceContext sc, const uint slice_idx)
|
|
{
|
|
#ifdef CACHED_SYMBOL_READER
|
|
if (gl_LocalInvocationID.x > 0)
|
|
return;
|
|
#endif
|
|
|
|
#ifdef GOLOMB
|
|
uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb);
|
|
#else
|
|
uint32_t enc_len = rac_terminate(sc.c);
|
|
#endif
|
|
|
|
u8buf bs = u8buf(sc.c.bytestream_start);
|
|
|
|
/* Append slice length */
|
|
u8vec4 enc_len_p = unpack8(enc_len);
|
|
bs[enc_len + 0].v = enc_len_p.z;
|
|
bs[enc_len + 1].v = enc_len_p.y;
|
|
bs[enc_len + 2].v = enc_len_p.x;
|
|
enc_len += 3;
|
|
|
|
/* Calculate and write CRC */
|
|
if (ec != 0) {
|
|
bs[enc_len].v = uint8_t(0);
|
|
enc_len++;
|
|
|
|
uint32_t crc = crcref;
|
|
for (int i = 0; i < enc_len; i++)
|
|
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
|
|
|
|
if (crcref != 0x00000000)
|
|
crc ^= 0x8CD88196;
|
|
|
|
u8vec4 crc_p = unpack8(crc);
|
|
bs[enc_len + 0].v = crc_p.x;
|
|
bs[enc_len + 1].v = crc_p.y;
|
|
bs[enc_len + 2].v = crc_p.z;
|
|
bs[enc_len + 3].v = crc_p.w;
|
|
enc_len += 4;
|
|
}
|
|
|
|
slice_results[slice_idx*2 + 0] = enc_len;
|
|
slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data);
|
|
}
|
|
|
|
void main(void)
|
|
{
|
|
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
|
encode_slice(slice_ctx[slice_idx], slice_idx);
|
|
finalize_slice(slice_ctx[slice_idx], slice_idx);
|
|
}
|