VSE: Multi-threaded video proxy downscaling
When building proxies at lower than 100% resolution, the video frame downscaling step was single threaded, as found via #127956. Make it use the same threaded sws_scale machinery that the usual video decoding/encoding uses. Video encoding/decoding was only using it for RGB<->YUV conversions, so source and destination sizes were always matching; here it needs to have different source and destination sizes though. Time taken to rebuild 50% proxy for a 4K resolution 1440 frames (1 minute) long video file, on Ryzen 5950X (Win10/VS2022): - Blender 4.2: 20.1 sec, CPU usage 30-40%. - Blender 4.3 main: 13.1 sec (ffmpeg build has been fixed to use SIMD), CPU usage still 30-40% though. - This PR: 8.3 sec, CPU usage ~95%. Pull Request: https://projects.blender.org/blender/blender/pulls/128054
This commit is contained in:
committed by
Aras Pranckevicius
parent
e44fc3df06
commit
64feb05089
@@ -36,13 +36,13 @@ enum {
|
||||
};
|
||||
|
||||
struct AVFrame;
|
||||
struct ImageFormatData;
|
||||
struct ImBuf;
|
||||
struct RenderData;
|
||||
struct ReportList;
|
||||
struct Scene;
|
||||
struct SwsContext;
|
||||
|
||||
struct ImBuf;
|
||||
|
||||
bool BKE_ffmpeg_start(void *context_v,
|
||||
const Scene *scene,
|
||||
RenderData *rd,
|
||||
@@ -79,8 +79,13 @@ void BKE_ffmpeg_exit();
|
||||
* to release it. Internally the contexts are coming from the context
|
||||
* pool/cache.
|
||||
*/
|
||||
SwsContext *BKE_ffmpeg_sws_get_context(
|
||||
int width, int height, int av_src_format, int av_dst_format, int sws_flags);
|
||||
SwsContext *BKE_ffmpeg_sws_get_context(int src_width,
|
||||
int src_height,
|
||||
int av_src_format,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int av_dst_format,
|
||||
int sws_flags);
|
||||
void BKE_ffmpeg_sws_release_context(SwsContext *ctx);
|
||||
|
||||
void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *src);
|
||||
|
||||
@@ -64,7 +64,8 @@ struct StampData;
|
||||
constexpr int64_t swscale_cache_max_entries = 32;
|
||||
|
||||
struct SwscaleContext {
|
||||
int width = 0, height = 0;
|
||||
int src_width = 0, src_height = 0;
|
||||
int dst_width = 0, dst_height = 0;
|
||||
AVPixelFormat src_format = AV_PIX_FMT_NONE, dst_format = AV_PIX_FMT_NONE;
|
||||
int flags = 0;
|
||||
|
||||
@@ -700,8 +701,13 @@ static const AVCodec *get_av1_encoder(
|
||||
return codec;
|
||||
}
|
||||
|
||||
static SwsContext *sws_create_context(
|
||||
int width, int height, int av_src_format, int av_dst_format, int sws_flags)
|
||||
static SwsContext *sws_create_context(int src_width,
|
||||
int src_height,
|
||||
int av_src_format,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int av_dst_format,
|
||||
int sws_flags)
|
||||
{
|
||||
# if defined(FFMPEG_SWSCALE_THREADING)
|
||||
/* sws_getContext does not allow passing flags that ask for multi-threaded
|
||||
@@ -710,11 +716,11 @@ static SwsContext *sws_create_context(
|
||||
if (c == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
av_opt_set_int(c, "srcw", width, 0);
|
||||
av_opt_set_int(c, "srch", height, 0);
|
||||
av_opt_set_int(c, "srcw", src_width, 0);
|
||||
av_opt_set_int(c, "srch", src_height, 0);
|
||||
av_opt_set_int(c, "src_format", av_src_format, 0);
|
||||
av_opt_set_int(c, "dstw", width, 0);
|
||||
av_opt_set_int(c, "dsth", height, 0);
|
||||
av_opt_set_int(c, "dstw", dst_width, 0);
|
||||
av_opt_set_int(c, "dsth", dst_height, 0);
|
||||
av_opt_set_int(c, "dst_format", av_dst_format, 0);
|
||||
av_opt_set_int(c, "sws_flags", sws_flags, 0);
|
||||
av_opt_set_int(c, "threads", BLI_system_thread_count(), 0);
|
||||
@@ -724,11 +730,11 @@ static SwsContext *sws_create_context(
|
||||
return nullptr;
|
||||
}
|
||||
# else
|
||||
SwsContext *c = sws_getContext(width,
|
||||
height,
|
||||
SwsContext *c = sws_getContext(src_width,
|
||||
src_height,
|
||||
AVPixelFormat(av_src_format),
|
||||
width,
|
||||
height,
|
||||
dst_width,
|
||||
dst_height,
|
||||
AVPixelFormat(av_dst_format),
|
||||
sws_flags,
|
||||
nullptr,
|
||||
@@ -783,8 +789,13 @@ static void maintain_swscale_cache_size()
|
||||
}
|
||||
}
|
||||
|
||||
SwsContext *BKE_ffmpeg_sws_get_context(
|
||||
int width, int height, int av_src_format, int av_dst_format, int sws_flags)
|
||||
SwsContext *BKE_ffmpeg_sws_get_context(int src_width,
|
||||
int src_height,
|
||||
int av_src_format,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int av_dst_format,
|
||||
int sws_flags)
|
||||
{
|
||||
BLI_mutex_lock(&swscale_cache_lock);
|
||||
|
||||
@@ -795,7 +806,8 @@ SwsContext *BKE_ffmpeg_sws_get_context(
|
||||
/* Search for unused context that has suitable parameters. */
|
||||
SwsContext *ctx = nullptr;
|
||||
for (SwscaleContext &c : *swscale_cache) {
|
||||
if (!c.is_used && c.width == width && c.height == height && c.src_format == av_src_format &&
|
||||
if (!c.is_used && c.src_width == src_width && c.src_height == src_height &&
|
||||
c.src_format == av_src_format && c.dst_width == dst_width && c.dst_height == dst_height &&
|
||||
c.dst_format == av_dst_format && c.flags == sws_flags)
|
||||
{
|
||||
ctx = c.context;
|
||||
@@ -807,10 +819,13 @@ SwsContext *BKE_ffmpeg_sws_get_context(
|
||||
}
|
||||
if (ctx == nullptr) {
|
||||
/* No free matching context in cache: create a new one. */
|
||||
ctx = sws_create_context(width, height, av_src_format, av_dst_format, sws_flags);
|
||||
ctx = sws_create_context(
|
||||
src_width, src_height, av_src_format, dst_width, dst_height, av_dst_format, sws_flags);
|
||||
SwscaleContext c;
|
||||
c.width = width;
|
||||
c.height = height;
|
||||
c.src_width = src_width;
|
||||
c.src_height = src_height;
|
||||
c.dst_width = dst_width;
|
||||
c.dst_height = dst_height;
|
||||
c.src_format = AVPixelFormat(av_src_format);
|
||||
c.dst_format = AVPixelFormat(av_dst_format);
|
||||
c.flags = sws_flags;
|
||||
@@ -1108,7 +1123,7 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
|
||||
/* Output pixel format is different, allocate frame for conversion. */
|
||||
context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
|
||||
context->img_convert_ctx = BKE_ffmpeg_sws_get_context(
|
||||
c->width, c->height, AV_PIX_FMT_RGBA, c->pix_fmt, SWS_BICUBIC);
|
||||
c->width, c->height, AV_PIX_FMT_RGBA, c->width, c->height, c->pix_fmt, SWS_BICUBIC);
|
||||
}
|
||||
|
||||
avcodec_parameters_from_context(st->codecpar, c);
|
||||
|
||||
@@ -418,6 +418,8 @@ static int startffmpeg(ImBufAnim *anim)
|
||||
anim->img_convert_ctx = BKE_ffmpeg_sws_get_context(anim->x,
|
||||
anim->y,
|
||||
anim->pCodecCtx->pix_fmt,
|
||||
anim->x,
|
||||
anim->y,
|
||||
AV_PIX_FMT_RGBA,
|
||||
SWS_BILINEAR | SWS_PRINT_INFO |
|
||||
SWS_FULL_CHR_H_INT);
|
||||
|
||||
@@ -27,6 +27,8 @@
|
||||
# include "BLI_winstuff.h"
|
||||
#endif
|
||||
|
||||
#include "BKE_writeffmpeg.hh"
|
||||
|
||||
#include "IMB_anim.hh"
|
||||
#include "IMB_imbuf.hh"
|
||||
#include "IMB_indexer.hh"
|
||||
@@ -35,6 +37,7 @@
|
||||
#ifdef WITH_FFMPEG
|
||||
extern "C" {
|
||||
# include "ffmpeg_compat.h"
|
||||
# include <libavutil/cpu.h>
|
||||
# include <libavutil/imgutils.h>
|
||||
}
|
||||
#endif
|
||||
@@ -595,32 +598,20 @@ static proxy_output_ctx *alloc_proxy_output_ffmpeg(
|
||||
if (st->codecpar->width != width || st->codecpar->height != height ||
|
||||
st->codecpar->format != rv->c->pix_fmt)
|
||||
{
|
||||
const size_t align = av_cpu_max_align();
|
||||
rv->frame = av_frame_alloc();
|
||||
|
||||
av_image_fill_arrays(rv->frame->data,
|
||||
rv->frame->linesize,
|
||||
static_cast<const uint8_t *>(MEM_mallocN(
|
||||
av_image_get_buffer_size(rv->c->pix_fmt, width, height, 1),
|
||||
"alloc proxy output frame")),
|
||||
rv->c->pix_fmt,
|
||||
width,
|
||||
height,
|
||||
1);
|
||||
|
||||
rv->frame->format = rv->c->pix_fmt;
|
||||
rv->frame->width = width;
|
||||
rv->frame->height = height;
|
||||
av_frame_get_buffer(rv->frame, align);
|
||||
|
||||
rv->sws_ctx = sws_getContext(st->codecpar->width,
|
||||
rv->orig_height,
|
||||
AVPixelFormat(st->codecpar->format),
|
||||
width,
|
||||
height,
|
||||
rv->c->pix_fmt,
|
||||
SWS_FAST_BILINEAR | SWS_PRINT_INFO,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr);
|
||||
rv->sws_ctx = BKE_ffmpeg_sws_get_context(st->codecpar->width,
|
||||
rv->orig_height,
|
||||
AVPixelFormat(st->codecpar->format),
|
||||
width,
|
||||
height,
|
||||
rv->c->pix_fmt,
|
||||
SWS_FAST_BILINEAR);
|
||||
}
|
||||
|
||||
ret = avformat_write_header(rv->of, nullptr);
|
||||
@@ -655,13 +646,7 @@ static void add_to_proxy_output_ffmpeg(proxy_output_ctx *ctx, AVFrame *frame)
|
||||
if (ctx->sws_ctx && frame &&
|
||||
(frame->data[0] || frame->data[1] || frame->data[2] || frame->data[3]))
|
||||
{
|
||||
sws_scale(ctx->sws_ctx,
|
||||
(const uint8_t *const *)frame->data,
|
||||
frame->linesize,
|
||||
0,
|
||||
ctx->orig_height,
|
||||
ctx->frame->data,
|
||||
ctx->frame->linesize);
|
||||
BKE_ffmpeg_sws_scale_frame(ctx->sws_ctx, ctx->frame, frame);
|
||||
}
|
||||
|
||||
frame = ctx->sws_ctx ? (frame ? ctx->frame : nullptr) : frame;
|
||||
@@ -752,10 +737,11 @@ static void free_proxy_output_ffmpeg(proxy_output_ctx *ctx, int rollback)
|
||||
avformat_free_context(ctx->of);
|
||||
|
||||
if (ctx->sws_ctx) {
|
||||
sws_freeContext(ctx->sws_ctx);
|
||||
|
||||
MEM_freeN(ctx->frame->data[0]);
|
||||
av_free(ctx->frame);
|
||||
BKE_ffmpeg_sws_release_context(ctx->sws_ctx);
|
||||
ctx->sws_ctx = nullptr;
|
||||
}
|
||||
if (ctx->frame) {
|
||||
av_frame_free(&ctx->frame);
|
||||
}
|
||||
|
||||
get_proxy_filepath(ctx->anim, ctx->proxy_size, filepath_tmp, true);
|
||||
|
||||
Reference in New Issue
Block a user