ffmpeg: optimize ffmpeg_postprocess
After doing regular movie frame decoding, there's a "postprocess" step for
each incoming frame, that does deinterlacing if needed, then YUV->RGB
conversion, then vertical image flip and additional interlace filtering if
needed. While this postprocess step is not the "heavy" part of movie
playback, it still takes 2-3ms per each 1080p resolution input frame that
is being played.
This PR does two things:
- Similar to #116008, uses multi-threaded `sws_scale` to do YUV->RGB
conversion.
- Reintroduces "do vertical flip while converting to RGB", where possible.
That was removed in 2ed73fc97e due to issues on arm64 platform, and
theory that negative strides passed to sws_scale is not an officially
supported usage.
My take on the last point: negative strides to sws_scale is a fine and
supported usage, just ffmpeg had a bug specifically on arm64 where they
were accidentally not respected. They fixed that for ffmpeg 6.0, and
backported it to all versions back to 3.4.13 -- you would not backport
something to 10 releases unless that was an actual bug fix!
I have tested the glitch_480p.mp4 that was originally attached to the
bug report #94237 back then, and it works fine both on x64 (Windows)
and arm64 (Mac).
Timings, ffmpeg_postprocess cost for a single 1920x1080 resolution movie
strip inside VSE:
- Windows/VS2022 Ryzen 5950X: 3.04ms -> 1.18ms
- Mac/clang15 M1 Max: 1.10ms -> 0.71ms
Pull Request: https://projects.blender.org/blender/blender/pulls/116309
This commit is contained in:
committed by
Aras Pranckevicius
parent
0e3dbcfae0
commit
4ef5d9f60f
@@ -39,9 +39,11 @@ enum {
|
|||||||
FFMPEG_PRESET_AV1 = 8,
|
FFMPEG_PRESET_AV1 = 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct AVFrame;
|
||||||
struct RenderData;
|
struct RenderData;
|
||||||
struct ReportList;
|
struct ReportList;
|
||||||
struct Scene;
|
struct Scene;
|
||||||
|
struct SwsContext;
|
||||||
|
|
||||||
int BKE_ffmpeg_start(void *context_v,
|
int BKE_ffmpeg_start(void *context_v,
|
||||||
const Scene *scene,
|
const Scene *scene,
|
||||||
@@ -73,4 +75,8 @@ bool BKE_ffmpeg_alpha_channel_is_supported(const RenderData *rd);
|
|||||||
void *BKE_ffmpeg_context_create(void);
|
void *BKE_ffmpeg_context_create(void);
|
||||||
void BKE_ffmpeg_context_free(void *context_v);
|
void BKE_ffmpeg_context_free(void *context_v);
|
||||||
|
|
||||||
|
SwsContext *BKE_ffmpeg_sws_get_context(
|
||||||
|
int width, int height, int av_src_format, int av_dst_format, int sws_flags);
|
||||||
|
void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -420,17 +420,7 @@ static AVFrame *generate_video_frame(FFMpegContext *context, const uint8_t *pixe
|
|||||||
/* Convert to the output pixel format, if it's different that Blender's internal one. */
|
/* Convert to the output pixel format, if it's different that Blender's internal one. */
|
||||||
if (context->img_convert_frame != nullptr) {
|
if (context->img_convert_frame != nullptr) {
|
||||||
BLI_assert(context->img_convert_ctx != NULL);
|
BLI_assert(context->img_convert_ctx != NULL);
|
||||||
# if defined(FFMPEG_SWSCALE_THREADING)
|
BKE_ffmpeg_sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
|
||||||
sws_scale_frame(context->img_convert_ctx, context->current_frame, rgb_frame);
|
|
||||||
# else
|
|
||||||
sws_scale(context->img_convert_ctx,
|
|
||||||
(const uint8_t *const *)rgb_frame->data,
|
|
||||||
rgb_frame->linesize,
|
|
||||||
0,
|
|
||||||
codec->height,
|
|
||||||
context->current_frame->data,
|
|
||||||
context->current_frame->linesize);
|
|
||||||
# endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return context->current_frame;
|
return context->current_frame;
|
||||||
@@ -677,10 +667,8 @@ static const AVCodec *get_av1_encoder(
|
|||||||
return codec;
|
return codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
static SwsContext *get_threaded_sws_context(int width,
|
SwsContext *BKE_ffmpeg_sws_get_context(
|
||||||
int height,
|
int width, int height, int av_src_format, int av_dst_format, int sws_flags)
|
||||||
AVPixelFormat src_format,
|
|
||||||
AVPixelFormat dst_format)
|
|
||||||
{
|
{
|
||||||
# if defined(FFMPEG_SWSCALE_THREADING)
|
# if defined(FFMPEG_SWSCALE_THREADING)
|
||||||
/* sws_getContext does not allow passing flags that ask for multi-threaded
|
/* sws_getContext does not allow passing flags that ask for multi-threaded
|
||||||
@@ -691,11 +679,11 @@ static SwsContext *get_threaded_sws_context(int width,
|
|||||||
}
|
}
|
||||||
av_opt_set_int(c, "srcw", width, 0);
|
av_opt_set_int(c, "srcw", width, 0);
|
||||||
av_opt_set_int(c, "srch", height, 0);
|
av_opt_set_int(c, "srch", height, 0);
|
||||||
av_opt_set_int(c, "src_format", src_format, 0);
|
av_opt_set_int(c, "src_format", av_src_format, 0);
|
||||||
av_opt_set_int(c, "dstw", width, 0);
|
av_opt_set_int(c, "dstw", width, 0);
|
||||||
av_opt_set_int(c, "dsth", height, 0);
|
av_opt_set_int(c, "dsth", height, 0);
|
||||||
av_opt_set_int(c, "dst_format", dst_format, 0);
|
av_opt_set_int(c, "dst_format", av_dst_format, 0);
|
||||||
av_opt_set_int(c, "sws_flags", SWS_BICUBIC, 0);
|
av_opt_set_int(c, "sws_flags", sws_flags, 0);
|
||||||
av_opt_set_int(c, "threads", BLI_system_thread_count(), 0);
|
av_opt_set_int(c, "threads", BLI_system_thread_count(), 0);
|
||||||
|
|
||||||
if (sws_init_context(c, nullptr, nullptr) < 0) {
|
if (sws_init_context(c, nullptr, nullptr) < 0) {
|
||||||
@@ -705,11 +693,11 @@ static SwsContext *get_threaded_sws_context(int width,
|
|||||||
# else
|
# else
|
||||||
SwsContext *c = sws_getContext(width,
|
SwsContext *c = sws_getContext(width,
|
||||||
height,
|
height,
|
||||||
src_format,
|
AVPixelFormat(av_src_format),
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
dst_format,
|
AVPixelFormat(av_dst_format),
|
||||||
SWS_BICUBIC,
|
sws_flags,
|
||||||
nullptr,
|
nullptr,
|
||||||
nullptr,
|
nullptr,
|
||||||
nullptr);
|
nullptr);
|
||||||
@@ -717,6 +705,14 @@ static SwsContext *get_threaded_sws_context(int width,
|
|||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src)
|
||||||
|
{
|
||||||
|
# if defined(FFMPEG_SWSCALE_THREADING)
|
||||||
|
sws_scale_frame(ctx, dst, src);
|
||||||
|
# else
|
||||||
|
sws_scale(ctx, src->data, src->linesize, 0, src->height, dst->data, dst->linesize);
|
||||||
|
# endif
|
||||||
|
}
|
||||||
|
|
||||||
/* prepare a video stream for the output file */
|
/* prepare a video stream for the output file */
|
||||||
|
|
||||||
@@ -955,8 +951,8 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
|
|||||||
else {
|
else {
|
||||||
/* Output pixel format is different, allocate frame for conversion. */
|
/* Output pixel format is different, allocate frame for conversion. */
|
||||||
context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
|
context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
|
||||||
context->img_convert_ctx = get_threaded_sws_context(
|
context->img_convert_ctx = BKE_ffmpeg_sws_get_context(
|
||||||
c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt);
|
c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt, SWS_BICUBIC);
|
||||||
}
|
}
|
||||||
|
|
||||||
avcodec_parameters_from_context(st->codecpar, c);
|
avcodec_parameters_from_context(st->codecpar, c);
|
||||||
|
|||||||
@@ -63,6 +63,7 @@
|
|||||||
|
|
||||||
#ifdef WITH_FFMPEG
|
#ifdef WITH_FFMPEG
|
||||||
# include "BKE_global.h" /* ENDIAN_ORDER */
|
# include "BKE_global.h" /* ENDIAN_ORDER */
|
||||||
|
# include "BKE_writeffmpeg.hh"
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
# include <libavcodec/avcodec.h>
|
# include <libavcodec/avcodec.h>
|
||||||
@@ -694,16 +695,12 @@ static int startffmpeg(anim *anim)
|
|||||||
1);
|
1);
|
||||||
}
|
}
|
||||||
|
|
||||||
anim->img_convert_ctx = sws_getContext(anim->x,
|
anim->img_convert_ctx = BKE_ffmpeg_sws_get_context(anim->x,
|
||||||
anim->y,
|
anim->y,
|
||||||
anim->pCodecCtx->pix_fmt,
|
anim->pCodecCtx->pix_fmt,
|
||||||
anim->x,
|
AV_PIX_FMT_RGBA,
|
||||||
anim->y,
|
SWS_BILINEAR | SWS_PRINT_INFO |
|
||||||
AV_PIX_FMT_RGBA,
|
SWS_FULL_CHR_H_INT);
|
||||||
SWS_BILINEAR | SWS_PRINT_INFO | SWS_FULL_CHR_H_INT,
|
|
||||||
nullptr,
|
|
||||||
nullptr,
|
|
||||||
nullptr);
|
|
||||||
|
|
||||||
if (!anim->img_convert_ctx) {
|
if (!anim->img_convert_ctx) {
|
||||||
fprintf(stderr, "Can't transform color space??? Bailing out...\n");
|
fprintf(stderr, "Can't transform color space??? Bailing out...\n");
|
||||||
@@ -846,32 +843,48 @@ static void ffmpeg_postprocess(anim *anim, AVFrame *input, ImBuf *ibuf)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sws_scale(anim->img_convert_ctx,
|
/* If final destination image layout matches that of decoded RGB frame (including
|
||||||
(const uint8_t *const *)input->data,
|
* any line padding done by ffmpeg for SIMD alignment), we can directly
|
||||||
input->linesize,
|
* decode into that, doing the vertical flip in the same step. Otherwise have
|
||||||
0,
|
* to do a separate flip. */
|
||||||
anim->y,
|
const int ibuf_linesize = ibuf->x * 4;
|
||||||
anim->pFrameRGB->data,
|
const int rgb_linesize = anim->pFrameRGB->linesize[0];
|
||||||
anim->pFrameRGB->linesize);
|
bool scale_to_ibuf = (rgb_linesize == ibuf_linesize);
|
||||||
|
/* swscale on arm64 before ffmpeg 6.0 (libswscale major version 7)
|
||||||
|
* could not handle negative line sizes. That has been fixed in all major
|
||||||
|
* ffmpeg releases in early 2023, but easier to just check for "below 7". */
|
||||||
|
# if (defined(__aarch64__) || defined(_M_ARM64)) && (LIBSWSCALE_VERSION_MAJOR < 7)
|
||||||
|
scale_to_ibuf = false;
|
||||||
|
# endif
|
||||||
|
uint8_t *rgb_data = anim->pFrameRGB->data[0];
|
||||||
|
|
||||||
|
if (scale_to_ibuf) {
|
||||||
|
/* Decode RGB and do vertical flip directly into destination image, by using negative
|
||||||
|
* line size. */
|
||||||
|
anim->pFrameRGB->linesize[0] = -ibuf_linesize;
|
||||||
|
anim->pFrameRGB->data[0] = ibuf->byte_buffer.data + (ibuf->y - 1) * ibuf_linesize;
|
||||||
|
|
||||||
|
BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
|
||||||
|
|
||||||
|
anim->pFrameRGB->linesize[0] = rgb_linesize;
|
||||||
|
anim->pFrameRGB->data[0] = rgb_data;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* Decode, then do vertical flip into destination. */
|
||||||
|
BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
|
||||||
|
|
||||||
|
/* Use negative line size to do vertical image flip. */
|
||||||
|
const int src_linesize[4] = {-rgb_linesize, 0, 0, 0};
|
||||||
|
const uint8_t *const src[4] = {
|
||||||
|
rgb_data + (anim->y - 1) * rgb_linesize, nullptr, nullptr, nullptr};
|
||||||
|
int dst_size = av_image_get_buffer_size(AVPixelFormat(anim->pFrameRGB->format),
|
||||||
|
anim->pFrameRGB->width,
|
||||||
|
anim->pFrameRGB->height,
|
||||||
|
1);
|
||||||
|
av_image_copy_to_buffer(
|
||||||
|
ibuf->byte_buffer.data, dst_size, src, src_linesize, AV_PIX_FMT_RGBA, anim->x, anim->y, 1);
|
||||||
|
}
|
||||||
|
|
||||||
/* Copy the valid bytes from the aligned buffer vertically flipped into ImBuf */
|
|
||||||
int aligned_stride = anim->pFrameRGB->linesize[0];
|
|
||||||
const uint8_t *const src[4] = {
|
|
||||||
anim->pFrameRGB->data[0] + (anim->y - 1) * aligned_stride, nullptr, nullptr, nullptr};
|
|
||||||
/* NOTE: Negative linesize is used to copy and flip image at once with function
|
|
||||||
* `av_image_copy_to_buffer`. This could cause issues in future and image may need to be flipped
|
|
||||||
* explicitly. */
|
|
||||||
const int src_linesize[4] = {-anim->pFrameRGB->linesize[0], 0, 0, 0};
|
|
||||||
int dst_size = av_image_get_buffer_size(
|
|
||||||
AVPixelFormat(anim->pFrameRGB->format), anim->pFrameRGB->width, anim->pFrameRGB->height, 1);
|
|
||||||
av_image_copy_to_buffer((uint8_t *)ibuf->byte_buffer.data,
|
|
||||||
dst_size,
|
|
||||||
src,
|
|
||||||
src_linesize,
|
|
||||||
AV_PIX_FMT_RGBA,
|
|
||||||
anim->x,
|
|
||||||
anim->y,
|
|
||||||
1);
|
|
||||||
if (filter_y) {
|
if (filter_y) {
|
||||||
IMB_filtery(ibuf);
|
IMB_filtery(ibuf);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user