ffmpeg: 10 and 12 bit video support

Part of overall #118493 task: video input/output support at 10 and 12 bit/component formats. (note: this is still LDR videos just at higher precision; there's no HDR handling (yet)). Movie reading/playback: when movie file pixel format has >8 bit components, decode those into a floating point ImBuf result. Previously all movies were decoded into 8 bit/channel ImBufs, so 10- and 12-bit movie pixel colors were getting quantized. Movie output: when ffmpeg video with suitable codec is selected, there's a color depth setting under Encoding block. Currently that is: - 10 bit option for H.264, H.265, AV1 (VP9 could do 10 bit in theory too, but ffmpeg that is built in Blender does not have that compiled in) - 12 bit option for H.265, AV1 When "lossless" is picked, then similar to how for regular 8-bit video it switches from YUV 4:2:0 to 4:4:4, this also switches to 4:4:4 10- or 12-bit variant. Pull Request: https://projects.blender.org/blender/blender/pulls/129298
2024-11-05 16:44:16 +01:00
parent f4f94fb055
commit 39c4c7cf3f
9 changed files with 287 additions and 107 deletions
--- a/scripts/startup/bl_ui/properties_output.py
+++ b/scripts/startup/bl_ui/properties_output.py
@@ -445,6 +445,13 @@ class RENDER_PT_encoding_video(RenderOutputButtonsPanel, Panel):
        if needs_codec and ffmpeg.codec == 'NONE':
            return

+        # Color depth. List of codecs needs to be in sync with
+        # `BKE_ffmpeg_valid_bit_depths` in source code.
+        use_bpp = needs_codec and ffmpeg.codec in {'H264', 'H265', 'AV1'}
+        if use_bpp:
+            image_settings = context.scene.render.image_settings
+            layout.prop(image_settings, "color_depth", expand=True)
+
        if ffmpeg.codec == 'DNXHD':
            layout.prop(ffmpeg, "use_lossless_output")

--- a/source/blender/blenkernel/BKE_image_format.h
+++ b/source/blender/blenkernel/BKE_image_format.h
@@ -14,6 +14,7 @@ extern "C" {

 struct BlendDataReader;
 struct BlendWriter;
+struct ID;
 struct ImbFormatOptions;
 struct ImageFormatData;
 struct ImBuf;
@@ -88,6 +89,7 @@ bool BKE_imtype_supports_quality(char imtype);
 bool BKE_imtype_requires_linear_float(char imtype);
 char BKE_imtype_valid_channels(char imtype, bool write_file);
 char BKE_imtype_valid_depths(char imtype);
+char BKE_imtype_valid_depths_with_video(char imtype, const ID *owner_id);

 /**
 * String is from command line `--render-format` argument,
--- a/source/blender/blenkernel/BKE_writeffmpeg.hh
+++ b/source/blender/blenkernel/BKE_writeffmpeg.hh
@@ -68,6 +68,11 @@ void BKE_ffmpeg_preset_set(RenderData *rd, int preset);
 void BKE_ffmpeg_image_type_verify(RenderData *rd, const ImageFormatData *imf);
 bool BKE_ffmpeg_alpha_channel_is_supported(const RenderData *rd);
 bool BKE_ffmpeg_codec_supports_crf(int av_codec_id);
+/**
+ * Which pixel bit depths are supported by a given video codec.
+ * Returns bitmask of `R_IMF_CHAN_DEPTH_` flags.
+ */
+int BKE_ffmpeg_valid_bit_depths(int av_codec_id);

 void *BKE_ffmpeg_context_create();
 void BKE_ffmpeg_context_free(void *context_v);
--- a/source/blender/blenkernel/intern/image_format.cc
+++ b/source/blender/blenkernel/intern/image_format.cc
@@ -21,6 +21,10 @@
 #include "BKE_colortools.hh"
 #include "BKE_image_format.h"

+#ifdef WITH_FFMPEG
+#  include "BKE_writeffmpeg.hh"
+#endif
+
 /* Init/Copy/Free */

 void BKE_image_format_init(ImageFormatData *imf, const bool render)
@@ -320,6 +324,22 @@ char BKE_imtype_valid_depths(const char imtype)
  }
 }

+char BKE_imtype_valid_depths_with_video(char imtype, const ID *owner_id)
+{
+  int depths = BKE_imtype_valid_depths(imtype);
+#ifdef WITH_FFMPEG
+  /* Depending on video codec selected, valid color bit depths might vary. */
+  if (imtype == R_IMF_IMTYPE_FFMPEG) {
+    const bool is_render_out = (owner_id && GS(owner_id->name) == ID_SCE);
+    if (is_render_out) {
+      const Scene *scene = (const Scene *)owner_id;
+      depths |= BKE_ffmpeg_valid_bit_depths(scene->r.ffcodecdata.codec);
+    }
+  }
+#endif
+  return depths;
+}
+
 char BKE_imtype_from_arg(const char *imtype_arg)
 {
  if (STREQ(imtype_arg, "TGA")) {
--- a/source/blender/blenkernel/intern/writeffmpeg.cc
+++ b/source/blender/blenkernel/intern/writeffmpeg.cc
@@ -398,9 +398,12 @@ static bool write_video_frame(FFMpegContext *context, AVFrame *frame, ReportList
 /* read and encode a frame of video from the buffer */
 static AVFrame *generate_video_frame(FFMpegContext *context, const ImBuf *image)
 {
-  /* For now only 8-bit/channel images are supported. */
  const uint8_t *pixels = image->byte_buffer.data;
-  if (pixels == nullptr) {
+  const float *pixels_fl = image->float_buffer.data;
+  /* Use float input if needed. */
+  const bool use_float = context->img_convert_frame != nullptr &&
+                         context->img_convert_frame->format != AV_PIX_FMT_RGBA;
+  if ((!use_float && (pixels == nullptr)) || (use_float && (pixels_fl == nullptr))) {
    return nullptr;
  }

@@ -421,31 +424,57 @@ static AVFrame *generate_video_frame(FFMpegContext *context, const ImBuf *image)
   * shared (i.e. not writable). */
  av_frame_make_writable(rgb_frame);

-  /* Copy the Blender pixels into the FFMPEG data-structure, taking care of endianness and flipping
-   * the image vertically. */
-  int linesize = rgb_frame->linesize[0];
-  int linesize_src = rgb_frame->width * 4;
-  for (int y = 0; y < height; y++) {
-    uint8_t *target = rgb_frame->data[0] + linesize * (height - y - 1);
-    const uint8_t *src = pixels + linesize_src * y;
+  const size_t linesize_dst = rgb_frame->linesize[0];
+  if (use_float) {
+    /* Float image: need to split up the image into a planar format,
+     * because libswscale does not support RGBA->YUV conversions from
+     * packed float formats. */
+    BLI_assert_msg(rgb_frame->linesize[1] == linesize_dst &&
+                       rgb_frame->linesize[2] == linesize_dst &&
+                       rgb_frame->linesize[3] == linesize_dst,
+                   "ffmpeg frame should be 4 same size planes for a floating point image case");
+    for (int y = 0; y < height; y++) {
+      size_t dst_offset = linesize_dst * (height - y - 1);
+      float *dst_g = reinterpret_cast<float *>(rgb_frame->data[0] + dst_offset);
+      float *dst_b = reinterpret_cast<float *>(rgb_frame->data[1] + dst_offset);
+      float *dst_r = reinterpret_cast<float *>(rgb_frame->data[2] + dst_offset);
+      float *dst_a = reinterpret_cast<float *>(rgb_frame->data[3] + dst_offset);
+      const float *src = pixels_fl + image->x * y * 4;
+      for (int x = 0; x < image->x; x++) {
+        *dst_r++ = src[0];
+        *dst_g++ = src[1];
+        *dst_b++ = src[2];
+        *dst_a++ = src[3];
+        src += 4;
+      }
+    }
+  }
+  else {
+    /* Byte image: flip the image vertically, possibly with endian
+     * conversion. */
+    const size_t linesize_src = rgb_frame->width * 4;
+    for (int y = 0; y < height; y++) {
+      uint8_t *target = rgb_frame->data[0] + linesize_dst * (height - y - 1);
+      const uint8_t *src = pixels + linesize_src * y;

 #  if ENDIAN_ORDER == L_ENDIAN
-    memcpy(target, src, linesize_src);
+      memcpy(target, src, linesize_src);

 #  elif ENDIAN_ORDER == B_ENDIAN
-    const uint8_t *end = src + linesize_src;
-    while (src != end) {
-      target[3] = src[0];
-      target[2] = src[1];
-      target[1] = src[2];
-      target[0] = src[3];
+      const uint8_t *end = src + linesize_src;
+      while (src != end) {
+        target[3] = src[0];
+        target[2] = src[1];
+        target[1] = src[2];
+        target[0] = src[3];

-      target += 4;
-      src += 4;
-    }
+        target += 4;
+        src += 4;
+      }
 #  else
 #    error ENDIAN_ORDER should either be L_ENDIAN or B_ENDIAN.
 #  endif
+    }
  }

  /* Convert to the output pixel format, if it's different that Blender's internal one. */
@@ -868,25 +897,36 @@ void BKE_ffmpeg_sws_scale_frame(SwsContext *ctx, AVFrame *dst, const AVFrame *sr

 /* Remap H.264 CRF to H.265 CRF: 17..32 range (23 default) to 20..37 range (28 default).
 * https://trac.ffmpeg.org/wiki/Encode/H.265 */
-static int remap_crf_to_h265_crf(int crf)
+static int remap_crf_to_h265_crf(int crf, bool is_10_or_12_bpp)
 {
+  /* 10/12 bit videos seem to need slightly lower CRF value for similar quality. */
+  const int bias = is_10_or_12_bpp ? -3 : 0;
  switch (crf) {
    case FFM_CRF_PERC_LOSSLESS:
-      return 20;
+      return 20 + bias;
    case FFM_CRF_HIGH:
-      return 24;
+      return 24 + bias;
    case FFM_CRF_MEDIUM:
-      return 28;
+      return 28 + bias;
    case FFM_CRF_LOW:
-      return 31;
+      return 31 + bias;
    case FFM_CRF_VERYLOW:
-      return 34;
+      return 34 + bias;
    case FFM_CRF_LOWEST:
-      return 37;
+      return 37 + bias;
  }
  return crf;
 }

+/* 10bpp H264: remap 0..51 range to -12..51 range
+ * https://trac.ffmpeg.org/wiki/Encode/H.264#a1.ChooseaCRFvalue */
+static int remap_crf_to_h264_10bpp_crf(int crf)
+{
+  crf = int(-12.0f + (crf / 51.0f) * 63.0f);
+  crf = max_ii(crf, 0);
+  return crf;
+}
+
 static void set_quality_rate_options(const FFMpegContext *context,
                                     const AVCodecID codec_id,
                                     const RenderData *rd,
@@ -907,6 +947,8 @@ static void set_quality_rate_options(const FFMpegContext *context,
   * https://trac.ffmpeg.org/wiki/Encode/VP9 */
  c->bit_rate = 0;

+  const bool is_10_bpp = rd->im_format.depth == R_IMF_CHAN_DEPTH_10;
+  const bool is_12_bpp = rd->im_format.depth == R_IMF_CHAN_DEPTH_12;
  const bool av1_librav1e = codec_id == AV_CODEC_ID_AV1 && STREQ(c->codec->name, "librav1e");
  const bool av1_libsvtav1 = codec_id == AV_CODEC_ID_AV1 && STREQ(c->codec->name, "libsvtav1");

@@ -916,6 +958,10 @@ static void set_quality_rate_options(const FFMpegContext *context,
      /* VP9 needs "lossless": https://trac.ffmpeg.org/wiki/Encode/VP9#LosslessVP9 */
      ffmpeg_dict_set_int(opts, "lossless", 1);
    }
+    else if (codec_id == AV_CODEC_ID_H264 && is_10_bpp) {
+      /* 10bpp H264 needs "qp": https://trac.ffmpeg.org/wiki/Encode/H.264#a1.ChooseaCRFvalue */
+      ffmpeg_dict_set_int(opts, "qp", 0);
+    }
    else if (codec_id == AV_CODEC_ID_H265) {
      /* H.265 needs "lossless" in private params; also make it much less verbose. */
      av_dict_set(opts, "x265-params", "log-level=1:lossless=1", 0);
@@ -934,8 +980,11 @@ static void set_quality_rate_options(const FFMpegContext *context,
  /* Handle CRF setting cases. */
  int crf = context->ffmpeg_crf;

-  if (codec_id == AV_CODEC_ID_H265) {
-    crf = remap_crf_to_h265_crf(crf);
+  if (codec_id == AV_CODEC_ID_H264 && is_10_bpp) {
+    crf = remap_crf_to_h264_10bpp_crf(crf);
+  }
+  else if (codec_id == AV_CODEC_ID_H265) {
+    crf = remap_crf_to_h265_crf(crf, is_10_bpp || is_12_bpp);
    /* Make H.265 much less verbose. */
    av_dict_set(opts, "x265-params", "log-level=1", 0);
  }
@@ -1080,6 +1129,15 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
    c->pix_fmt = AV_PIX_FMT_YUV422P;
  }

+  const bool is_10_bpp = rd->im_format.depth == R_IMF_CHAN_DEPTH_10;
+  const bool is_12_bpp = rd->im_format.depth == R_IMF_CHAN_DEPTH_12;
+  if (is_10_bpp) {
+    c->pix_fmt = AV_PIX_FMT_YUV420P10LE;
+  }
+  else if (is_12_bpp) {
+    c->pix_fmt = AV_PIX_FMT_YUV420P12LE;
+  }
+
  if (context->ffmpeg_type == FFMPEG_XVID) {
    /* Alas! */
    c->pix_fmt = AV_PIX_FMT_YUV420P;
@@ -1121,6 +1179,12 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
  {
    /* Use 4:4:4 instead of 4:2:0 pixel format for lossless rendering. */
    c->pix_fmt = AV_PIX_FMT_YUV444P;
+    if (is_10_bpp) {
+      c->pix_fmt = AV_PIX_FMT_YUV444P10LE;
+    }
+    else if (is_12_bpp) {
+      c->pix_fmt = AV_PIX_FMT_YUV444P12LE;
+    }
  }

  if (codec_id == AV_CODEC_ID_PNG) {
@@ -1178,9 +1242,10 @@ static AVStream *alloc_video_stream(FFMpegContext *context,
  }
  else {
    /* Output pixel format is different, allocate frame for conversion. */
-    context->img_convert_frame = alloc_picture(AV_PIX_FMT_RGBA, c->width, c->height);
+    AVPixelFormat src_format = is_10_bpp || is_12_bpp ? AV_PIX_FMT_GBRAPF32LE : AV_PIX_FMT_RGBA;
+    context->img_convert_frame = alloc_picture(src_format, c->width, c->height);
    context->img_convert_ctx = BKE_ffmpeg_sws_get_context(
-        c->width, c->height, AV_PIX_FMT_RGBA, c->width, c->height, c->pix_fmt, SWS_BICUBIC);
+        c->width, c->height, src_format, c->width, c->height, c->pix_fmt, SWS_BICUBIC);
  }

  avcodec_parameters_from_context(st->codecpar, c);
@@ -2036,6 +2101,19 @@ bool BKE_ffmpeg_codec_supports_crf(int av_codec_id)
              AV_CODEC_ID_AV1);
 }

+int BKE_ffmpeg_valid_bit_depths(int av_codec_id)
+{
+  int bit_depths = R_IMF_CHAN_DEPTH_8;
+  /* Note: update properties_output.py `use_bpp` when changing this function. */
+  if (ELEM(av_codec_id, AV_CODEC_ID_H264, AV_CODEC_ID_H265, AV_CODEC_ID_AV1)) {
+    bit_depths |= R_IMF_CHAN_DEPTH_10;
+  }
+  if (ELEM(av_codec_id, AV_CODEC_ID_H265, AV_CODEC_ID_AV1)) {
+    bit_depths |= R_IMF_CHAN_DEPTH_12;
+  }
+  return bit_depths;
+}
+
 void *BKE_ffmpeg_context_create()
 {
  /* New FFMPEG data struct. */
--- a/source/blender/editors/space_image/image_buttons.cc
+++ b/source/blender/editors/space_image/image_buttons.cc
@@ -940,6 +940,9 @@ void uiTemplateImageSettings(uiLayout *layout, PointerRNA *imfptr, bool color_ma
 {
  ImageFormatData *imf = static_cast<ImageFormatData *>(imfptr->data);
  ID *id = imfptr->owner_id;
+  /* Note: this excludes any video formats; for them the image template does
+   * not show the color depth. Color depth instead is shown as part of encoding UI block,
+   * which is less confusing. */
  const int depth_ok = BKE_imtype_valid_depths(imf->imtype);
  /* some settings depend on this being a scene that's rendered */
  const bool is_render_out = (id && GS(id->name) == ID_SCE);
--- a/source/blender/imbuf/intern/IMB_anim.hh
+++ b/source/blender/imbuf/intern/IMB_anim.hh
@@ -59,6 +59,7 @@ struct ImBufAnim {
  AVPacket *cur_packet;

  bool seek_before_decode;
+  bool is_float;
 #endif

  char index_dir[768];
--- a/source/blender/imbuf/intern/anim_movie.cc
+++ b/source/blender/imbuf/intern/anim_movie.cc
@@ -234,6 +234,19 @@ static int ffmpeg_frame_count_get(AVFormatContext *pFormatCtx, AVStream *video_s
  return 0;
 }

+static int calc_pix_fmt_max_component_bits(AVPixelFormat fmt)
+{
+  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+  if (desc == nullptr) {
+    return 0;
+  }
+  int bits = 0;
+  for (int i = 0; i < desc->nb_components; i++) {
+    bits = max_ii(bits, desc->comp[i].depth);
+  }
+  return bits;
+}
+
 static int startffmpeg(ImBufAnim *anim)
 {
  const AVCodec *pCodec;
@@ -343,6 +356,8 @@ static int startffmpeg(ImBufAnim *anim)

  anim->x = pCodecCtx->width;
  anim->y = pCodecCtx->height;
+  /* Decode >8bit videos into floating point image. */
+  anim->is_float = calc_pix_fmt_max_component_bits(pCodecCtx->pix_fmt) > 8;

  anim->pFormatCtx = pFormatCtx;
  anim->pCodecCtx = pCodecCtx;
@@ -361,7 +376,10 @@ static int startffmpeg(ImBufAnim *anim)
  anim->pFrame_complete = false;
  anim->pFrameDeinterlaced = av_frame_alloc();
  anim->pFrameRGB = av_frame_alloc();
-  anim->pFrameRGB->format = AV_PIX_FMT_RGBA;
+  /* Ideally we'd use AV_PIX_FMT_RGBAF32LE for floats, but currently (ffmpeg 6.1)
+   * swscale does not support that as destination. So using AV_PIX_FMT_GBRAPF32LE
+   * with manual interleaving to RGBA floats. */
+  anim->pFrameRGB->format = anim->is_float ? AV_PIX_FMT_GBRAPF32LE : AV_PIX_FMT_RGBA;
  anim->pFrameRGB->width = anim->x;
  anim->pFrameRGB->height = anim->y;

@@ -379,19 +397,6 @@ static int startffmpeg(ImBufAnim *anim)
    return -1;
  }

-  if (av_image_get_buffer_size(AV_PIX_FMT_RGBA, anim->x, anim->y, 1) != anim->x * anim->y * 4) {
-    fprintf(stderr, "ffmpeg has changed alloc scheme ... ARGHHH!\n");
-    avcodec_free_context(&anim->pCodecCtx);
-    avformat_close_input(&anim->pFormatCtx);
-    av_packet_free(&anim->cur_packet);
-    av_frame_free(&anim->pFrameRGB);
-    av_frame_free(&anim->pFrameDeinterlaced);
-    av_frame_free(&anim->pFrame);
-    av_frame_free(&anim->pFrame_backup);
-    anim->pCodecCtx = nullptr;
-    return -1;
-  }
-
  if (anim->ib_flags & IB_animdeinterlace) {
    anim->pFrameDeinterlaced->format = anim->pCodecCtx->pix_fmt;
    anim->pFrameDeinterlaced->width = anim->pCodecCtx->width;
@@ -414,12 +419,16 @@ static int startffmpeg(ImBufAnim *anim)
                                                     anim->pCodecCtx->pix_fmt,
                                                     anim->x,
                                                     anim->y,
-                                                     AV_PIX_FMT_RGBA,
+                                                     anim->pFrameRGB->format,
                                                     SWS_BILINEAR | SWS_PRINT_INFO |
                                                         SWS_FULL_CHR_H_INT);

  if (!anim->img_convert_ctx) {
-    fprintf(stderr, "Can't transform color space??? Bailing out...\n");
+    fprintf(stderr,
+            "ffmpeg: swscale can't transform from pixel format %s to %s (%s)\n",
+            av_get_pix_fmt_name(anim->pCodecCtx->pix_fmt),
+            av_get_pix_fmt_name((AVPixelFormat)anim->pFrameRGB->format),
+            anim->filepath);
    avcodec_free_context(&anim->pCodecCtx);
    avformat_close_input(&anim->pFormatCtx);
    av_packet_free(&anim->cur_packet);
@@ -559,46 +568,81 @@ static void ffmpeg_postprocess(ImBufAnim *anim, AVFrame *input, ImBuf *ibuf)
    }
  }

-  /* If final destination image layout matches that of decoded RGB frame (including
-   * any line padding done by ffmpeg for SIMD alignment), we can directly
-   * decode into that, doing the vertical flip in the same step. Otherwise have
-   * to do a separate flip. */
-  const int ibuf_linesize = ibuf->x * 4;
-  const int rgb_linesize = anim->pFrameRGB->linesize[0];
-  bool scale_to_ibuf = (rgb_linesize == ibuf_linesize);
-  /* swscale on arm64 before ffmpeg 6.0 (libswscale major version 7)
-   * could not handle negative line sizes. That has been fixed in all major
-   * ffmpeg releases in early 2023, but easier to just check for "below 7". */
-#  if (defined(__aarch64__) || defined(_M_ARM64)) && (LIBSWSCALE_VERSION_MAJOR < 7)
-  scale_to_ibuf = false;
-#  endif
-  uint8_t *rgb_data = anim->pFrameRGB->data[0];
-
-  if (scale_to_ibuf) {
-    /* Decode RGB and do vertical flip directly into destination image, by using negative
-     * line size. */
-    anim->pFrameRGB->linesize[0] = -ibuf_linesize;
-    anim->pFrameRGB->data[0] = ibuf->byte_buffer.data + (ibuf->y - 1) * ibuf_linesize;
-
-    BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
-
-    anim->pFrameRGB->linesize[0] = rgb_linesize;
-    anim->pFrameRGB->data[0] = rgb_data;
-  }
-  else {
+  if (anim->is_float) {
+    /* Float images are converted into planar BGRA layout by swscale (since
+     * it does not support direct YUV->RGBA float interleaved conversion).
+     * Do vertical flip and interleave into RGBA manually. */
    /* Decode, then do vertical flip into destination. */
    BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);

-    /* Use negative line size to do vertical image flip. */
-    const int src_linesize[4] = {-rgb_linesize, 0, 0, 0};
-    const uint8_t *const src[4] = {
-        rgb_data + (anim->y - 1) * rgb_linesize, nullptr, nullptr, nullptr};
-    int dst_size = av_image_get_buffer_size(AVPixelFormat(anim->pFrameRGB->format),
-                                            anim->pFrameRGB->width,
-                                            anim->pFrameRGB->height,
-                                            1);
-    av_image_copy_to_buffer(
-        ibuf->byte_buffer.data, dst_size, src, src_linesize, AV_PIX_FMT_RGBA, anim->x, anim->y, 1);
+    const size_t src_linesize = anim->pFrameRGB->linesize[0];
+    BLI_assert_msg(anim->pFrameRGB->linesize[1] == src_linesize &&
+                       anim->pFrameRGB->linesize[2] == src_linesize &&
+                       anim->pFrameRGB->linesize[3] == src_linesize,
+                   "ffmpeg frame should be 4 same size planes for a floating point image case");
+    for (int y = 0; y < ibuf->y; y++) {
+      size_t src_offset = src_linesize * (ibuf->y - y - 1);
+      const float *src_g = reinterpret_cast<const float *>(anim->pFrameRGB->data[0] + src_offset);
+      const float *src_b = reinterpret_cast<const float *>(anim->pFrameRGB->data[1] + src_offset);
+      const float *src_r = reinterpret_cast<const float *>(anim->pFrameRGB->data[2] + src_offset);
+      const float *src_a = reinterpret_cast<const float *>(anim->pFrameRGB->data[3] + src_offset);
+      float *dst = ibuf->float_buffer.data + ibuf->x * y * 4;
+      for (int x = 0; x < ibuf->x; x++) {
+        *dst++ = *src_r++;
+        *dst++ = *src_g++;
+        *dst++ = *src_b++;
+        *dst++ = *src_a++;
+      }
+    }
+  }
+  else {
+    /* If final destination image layout matches that of decoded RGB frame (including
+     * any line padding done by ffmpeg for SIMD alignment), we can directly
+     * decode into that, doing the vertical flip in the same step. Otherwise have
+     * to do a separate flip. */
+    const int ibuf_linesize = ibuf->x * 4;
+    const int rgb_linesize = anim->pFrameRGB->linesize[0];
+    bool scale_to_ibuf = (rgb_linesize == ibuf_linesize);
+    /* swscale on arm64 before ffmpeg 6.0 (libswscale major version 7)
+     * could not handle negative line sizes. That has been fixed in all major
+     * ffmpeg releases in early 2023, but easier to just check for "below 7". */
+#  if (defined(__aarch64__) || defined(_M_ARM64)) && (LIBSWSCALE_VERSION_MAJOR < 7)
+    scale_to_ibuf = false;
+#  endif
+    uint8_t *rgb_data = anim->pFrameRGB->data[0];
+
+    if (scale_to_ibuf) {
+      /* Decode RGB and do vertical flip directly into destination image, by using negative
+       * line size. */
+      anim->pFrameRGB->linesize[0] = -ibuf_linesize;
+      anim->pFrameRGB->data[0] = ibuf->byte_buffer.data + (ibuf->y - 1) * ibuf_linesize;
+
+      BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
+
+      anim->pFrameRGB->linesize[0] = rgb_linesize;
+      anim->pFrameRGB->data[0] = rgb_data;
+    }
+    else {
+      /* Decode, then do vertical flip into destination. */
+      BKE_ffmpeg_sws_scale_frame(anim->img_convert_ctx, anim->pFrameRGB, input);
+
+      /* Use negative line size to do vertical image flip. */
+      const int src_linesize[4] = {-rgb_linesize, 0, 0, 0};
+      const uint8_t *const src[4] = {
+          rgb_data + (anim->y - 1) * rgb_linesize, nullptr, nullptr, nullptr};
+      int dst_size = av_image_get_buffer_size(AVPixelFormat(anim->pFrameRGB->format),
+                                              anim->pFrameRGB->width,
+                                              anim->pFrameRGB->height,
+                                              1);
+      av_image_copy_to_buffer(ibuf->byte_buffer.data,
+                              dst_size,
+                              src,
+                              src_linesize,
+                              AVPixelFormat(anim->pFrameRGB->format),
+                              anim->x,
+                              anim->y,
+                              1);
+    }
  }

  if (filter_y) {
@@ -1136,11 +1180,17 @@ static ImBuf *ffmpeg_fetchibuf(ImBufAnim *anim, int position, IMB_Timecode_Type

  /* Allocate the storage explicitly to ensure the memory is aligned. */
  const size_t align = ffmpeg_get_buffer_alignment();
+  const size_t pixel_size = anim->is_float ? 16 : 4;
  uint8_t *buffer_data = static_cast<uint8_t *>(
-      MEM_mallocN_aligned(size_t(4) * anim->x * anim->y, align, "ffmpeg ibuf"));
-  IMB_assign_byte_buffer(cur_frame_final, buffer_data, IB_TAKE_OWNERSHIP);
-
-  cur_frame_final->byte_buffer.colorspace = colormanage_colorspace_get_named(anim->colorspace);
+      MEM_mallocN_aligned(pixel_size * anim->x * anim->y, align, "ffmpeg ibuf"));
+  if (anim->is_float) {
+    IMB_assign_float_buffer(cur_frame_final, (float *)buffer_data, IB_TAKE_OWNERSHIP);
+    cur_frame_final->float_buffer.colorspace = colormanage_colorspace_get_named(anim->colorspace);
+  }
+  else {
+    IMB_assign_byte_buffer(cur_frame_final, buffer_data, IB_TAKE_OWNERSHIP);
+    cur_frame_final->byte_buffer.colorspace = colormanage_colorspace_get_named(anim->colorspace);
+  }

  AVFrame *final_frame = ffmpeg_frame_by_pts_get(anim, pts_to_search);
  if (final_frame == nullptr) {
--- a/source/blender/makesrna/intern/rna_scene.cc
+++ b/source/blender/makesrna/intern/rna_scene.cc
@@ -1307,6 +1307,27 @@ static bool rna_RenderSettings_is_movie_format_get(PointerRNA *ptr)
  return BKE_imtype_is_movie(rd->im_format.imtype);
 }

+static int get_first_valid_depth(const int valid_depths)
+{
+  /* set first available depth */
+  const char depth_ls[] = {
+      R_IMF_CHAN_DEPTH_32,
+      R_IMF_CHAN_DEPTH_24,
+      R_IMF_CHAN_DEPTH_16,
+      R_IMF_CHAN_DEPTH_12,
+      R_IMF_CHAN_DEPTH_10,
+      R_IMF_CHAN_DEPTH_8,
+      R_IMF_CHAN_DEPTH_1,
+      0,
+  };
+  for (int i = 0; depth_ls[i]; i++) {
+    if (valid_depths & depth_ls[i]) {
+      return depth_ls[i];
+    }
+  }
+  return R_IMF_CHAN_DEPTH_8;
+}
+
 static void rna_ImageFormatSettings_file_format_set(PointerRNA *ptr, int value)
 {
  ImageFormatData *imf = (ImageFormatData *)ptr->data;
@@ -1330,25 +1351,7 @@ static void rna_ImageFormatSettings_file_format_set(PointerRNA *ptr, int value)
  {
    const int depth_ok = BKE_imtype_valid_depths(imf->imtype);
    if ((imf->depth & depth_ok) == 0) {
-      /* set first available depth */
-      char depth_ls[] = {
-          R_IMF_CHAN_DEPTH_32,
-          R_IMF_CHAN_DEPTH_24,
-          R_IMF_CHAN_DEPTH_16,
-          R_IMF_CHAN_DEPTH_12,
-          R_IMF_CHAN_DEPTH_10,
-          R_IMF_CHAN_DEPTH_8,
-          R_IMF_CHAN_DEPTH_1,
-          0,
-      };
-      int i;
-
-      for (i = 0; depth_ls[i]; i++) {
-        if (depth_ok & depth_ls[i]) {
-          imf->depth = depth_ls[i];
-          break;
-        }
-      }
+      imf->depth = get_first_valid_depth(depth_ok);
    }
  }

@@ -1444,7 +1447,7 @@ static const EnumPropertyItem *rna_ImageFormatSettings_color_depth_itemf(bContex
    return rna_enum_image_color_depth_items;
  }
  else {
-    const int depth_ok = BKE_imtype_valid_depths(imf->imtype);
+    const int depth_ok = BKE_imtype_valid_depths_with_video(imf->imtype, ptr->owner_id);
    const int is_float = ELEM(
        imf->imtype, R_IMF_IMTYPE_RADHDR, R_IMF_IMTYPE_OPENEXR, R_IMF_IMTYPE_MULTILAYER);

@@ -2958,6 +2961,17 @@ static void rna_FFmpegSettings_codec_update(Main * /*bmain*/, Scene * /*scene*/,
     * mode to CBR for others. */
    codec_data->constant_rate_factor = FFM_CRF_NONE;
  }
+
+  /* Ensure valid color depth when changing the codec. */
+  const ID *id = ptr->owner_id;
+  const bool is_render = (id && GS(id->name) == ID_SCE);
+  if (is_render) {
+    Scene *scene = (Scene *)ptr->owner_id;
+    const int valid_depths = BKE_imtype_valid_depths_with_video(scene->r.im_format.imtype, id);
+    if ((scene->r.im_format.depth & valid_depths) == 0) {
+      scene->r.im_format.depth = get_first_valid_depth(valid_depths);
+    }
+  }
 }
 #  endif