From c989add4cd197762b2b4860578804eca413796af Mon Sep 17 00:00:00 2001
From: Kace <lakacey03@gmail.com>
Date: Fri, 3 Oct 2025 09:42:48 +0200
Subject: [PATCH] VSE: Sound pitch correction

Add sound pitch correction functionality, so that retimed sound strips
can preserve their original pitch.

This has been implemented as a GSoC 2025 project. Actual pitch
correction is done with Rubberband library, which has been already
included into Blender 5.0 library builds; and then most of the other
code has been in Audaspace, which was already updated within Blender
tree earlier.

So this PR just flips on Rubberband build option, and adds the
checkbox to VSE sound strips for pitch correction (on by default for
newly created sound strips). Pitch correction works with both
simple whole-strip retiming, as well as more complex retiming setups
where different parts of the strip use different speeds.

Co-authored-by: Aras Pranckevicius <aras@nesnausk.org>
Pull Request: https://projects.blender.org/blender/blender/pulls/143347
---
 CMakeLists.txt                                |  2 +-
 build_files/cmake/config/blender_full.cmake   |  1 +
 build_files/cmake/config/blender_lite.cmake   |  2 +-
 .../cmake/config/blender_release.cmake        |  1 +
 scripts/startup/bl_ui/space_sequencer.py      |  4 ++
 source/blender/blenkernel/BKE_sound.h         | 12 +++++
 source/blender/blenkernel/CMakeLists.txt      |  4 ++
 source/blender/blenkernel/intern/sound.cc     | 48 +++++++++++++++++
 source/blender/makesdna/DNA_sequence_types.h  |  2 +-
 .../blender/makesrna/intern/rna_sequencer.cc  |  7 +++
 source/blender/sequencer/CMakeLists.txt       |  3 ++
 source/blender/sequencer/intern/strip_add.cc  |  3 ++
 .../sequencer/intern/strip_retiming.cc        | 51 ++++++++++++++++---
 13 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index be11c314cd2..fab9b3d0320 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -420,7 +420,7 @@ mark_as_advanced(WITH_SYSTEM_AUDASPACE)
 if(WITH_AUDASPACE AND NOT WITH_SYSTEM_AUDASPACE)
   option(WITH_RUBBERBAND "\
   Build with Rubber Band for audio time-stretching and pitch-scaling (used by Audaspace)" 
-    OFF
+    ON
   )
 endif()
 
diff --git a/build_files/cmake/config/blender_full.cmake b/build_files/cmake/config/blender_full.cmake
index 39c6b711c33..a84aad23b88 100644
--- a/build_files/cmake/config/blender_full.cmake
+++ b/build_files/cmake/config/blender_full.cmake
@@ -49,6 +49,7 @@ set(WITH_POTRACE             ON  CACHE BOOL "" FORCE)
 set(WITH_PUGIXML             ON  CACHE BOOL "" FORCE)
 set(WITH_PYTHON_INSTALL      ON  CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW          ON  CACHE BOOL "" FORCE)
+set(WITH_RUBBERBAND          ON  CACHE BOOL "" FORCE)
 set(WITH_SDL                 OFF CACHE BOOL "" FORCE)
 set(WITH_TBB                 ON  CACHE BOOL "" FORCE)
 set(WITH_USD                 ON  CACHE BOOL "" FORCE)
diff --git a/build_files/cmake/config/blender_lite.cmake b/build_files/cmake/config/blender_lite.cmake
index d07c32d00ad..419b81d66c0 100644
--- a/build_files/cmake/config/blender_lite.cmake
+++ b/build_files/cmake/config/blender_lite.cmake
@@ -52,12 +52,12 @@ set(WITH_OPENCOLORIO         OFF CACHE BOOL "" FORCE)
 set(WITH_OPENIMAGEDENOISE    OFF CACHE BOOL "" FORCE)
 set(WITH_OPENSUBDIV          OFF CACHE BOOL "" FORCE)
 set(WITH_OPENVDB             OFF CACHE BOOL "" FORCE)
-
 set(WITH_POTRACE             OFF CACHE BOOL "" FORCE)
 set(WITH_PUGIXML             OFF CACHE BOOL "" FORCE)
 set(WITH_PULSEAUDIO          OFF CACHE BOOL "" FORCE)
 set(WITH_PIPEWIRE            OFF CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW          OFF CACHE BOOL "" FORCE)
+set(WITH_RUBBERBAND          OFF CACHE BOOL "" FORCE)
 set(WITH_SDL                 OFF CACHE BOOL "" FORCE)
 set(WITH_TBB                 OFF CACHE BOOL "" FORCE)
 set(WITH_USD                 OFF CACHE BOOL "" FORCE)
diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake
index f5029e25f2c..6cc087c65e6 100644
--- a/build_files/cmake/config/blender_release.cmake
+++ b/build_files/cmake/config/blender_release.cmake
@@ -52,6 +52,7 @@ set(WITH_OPENVDB_BLOSC       ON  CACHE BOOL "" FORCE)
 set(WITH_POTRACE             ON  CACHE BOOL "" FORCE)
 set(WITH_PUGIXML             ON  CACHE BOOL "" FORCE)
 set(WITH_PYTHON_INSTALL      ON  CACHE BOOL "" FORCE)
+set(WITH_RUBBERBAND          ON  CACHE BOOL "" FORCE)
 set(WITH_QUADRIFLOW          ON  CACHE BOOL "" FORCE)
 set(WITH_SDL                 OFF CACHE BOOL "" FORCE)
 set(WITH_TBB                 ON  CACHE BOOL "" FORCE)
diff --git a/scripts/startup/bl_ui/space_sequencer.py b/scripts/startup/bl_ui/space_sequencer.py
index 0d56a5f73f3..8ea667b06ec 100644
--- a/scripts/startup/bl_ui/space_sequencer.py
+++ b/scripts/startup/bl_ui/space_sequencer.py
@@ -2502,6 +2502,10 @@ class SEQUENCER_PT_adjust_sound(SequencerButtonsPanel, Panel):
             layout.use_property_split = False
             col = layout.column()
 
+            split = col.split(factor=0.4)
+            split.label(text="")
+            split.prop(strip, "pitch_correction")
+
             if overlay_settings.waveform_display_type == 'DEFAULT_WAVEFORMS':
                 split = col.split(factor=0.4)
                 split.label(text="")
diff --git a/source/blender/blenkernel/BKE_sound.h b/source/blender/blenkernel/BKE_sound.h
index 4c7a622bc4c..d791c6f7e9d 100644
--- a/source/blender/blenkernel/BKE_sound.h
+++ b/source/blender/blenkernel/BKE_sound.h
@@ -170,6 +170,16 @@ void BKE_sound_set_scene_sound_pitch_constant_range(void *handle,
                                                     int frame_end,
                                                     float pitch);
 
+void BKE_sound_set_scene_sound_time_stretch_at_frame(void *handle,
+                                                     int frame,
+                                                     float time_stretch,
+                                                     char animated);
+
+void BKE_sound_set_scene_sound_time_stretch_constant_range(void *handle,
+                                                           int frame_start,
+                                                           int frame_end,
+                                                           float time_stretch);
+
 void BKE_sound_set_scene_sound_pan_at_frame(void *handle, int frame, float pan, char animated);
 
 void BKE_sound_update_sequencer(struct Main *main, struct bSound *sound);
@@ -204,3 +214,5 @@ void BKE_sound_jack_scene_update(struct Scene *scene, int mode, double time);
 struct Depsgraph;
 
 void BKE_sound_evaluate(struct Depsgraph *depsgraph, struct Main *bmain, struct bSound *sound);
+
+void *BKE_sound_add_time_stretch_effect(void *sound_handle, float fps);
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt
index 652af171d5e..bd1ab984e9c 100644
--- a/source/blender/blenkernel/CMakeLists.txt
+++ b/source/blender/blenkernel/CMakeLists.txt
@@ -628,6 +628,10 @@ if(WITH_AUDASPACE)
     )
   endif()
   add_definitions(-DWITH_AUDASPACE)
+  
+  if(WITH_RUBBERBAND)
+    add_definitions(-DWITH_RUBBERBAND)
+  endif()
 endif()
 
 if(WITH_BULLET)
diff --git a/source/blender/blenkernel/intern/sound.cc b/source/blender/blenkernel/intern/sound.cc
index db2d32a67fc..d25fcd806ff 100644
--- a/source/blender/blenkernel/intern/sound.cc
+++ b/source/blender/blenkernel/intern/sound.cc
@@ -45,6 +45,7 @@
 #  include <AUD_Sequence.h>
 #  include <AUD_Sound.h>
 #  include <AUD_Special.h>
+#  include <AUD_Types.h>
 #endif
 
 #include "BKE_bpath.hh"
@@ -1528,6 +1529,32 @@ bool BKE_sound_stream_info_get(Main *main,
   return true;
 }
 
+#  ifdef WITH_RUBBERBAND
+void *BKE_sound_add_time_stretch_effect(void *sound_handle, float fps)
+{
+  return AUD_Sound_animateableTimeStretchPitchScale(
+      sound_handle, fps, 1.0, 1.0, AUD_STRETCHER_QUALITY_HIGH, false);
+}
+void BKE_sound_set_scene_sound_time_stretch_at_frame(void *handle,
+                                                     int frame,
+                                                     float time_stretch,
+                                                     char animated)
+{
+  AUD_Sound_animateableTimeStretchPitchScale_setAnimationData(
+      handle, AUD_AP_TIME_STRETCH, frame, &time_stretch, animated);
+}
+void BKE_sound_set_scene_sound_time_stretch_constant_range(void *handle,
+                                                           int frame_start,
+                                                           int frame_end,
+                                                           float time_stretch)
+{
+  frame_start = max_ii(0, frame_start);
+  frame_end = max_ii(0, frame_end);
+  AUD_Sound_animateableTimeStretchPitchScale_setConstantRangeAnimationData(
+      handle, AUD_AP_TIME_STRETCH, frame_start, frame_end, &time_stretch);
+}
+#  endif /* WITH_RUBBERBAND */
+
 #else /* WITH_AUDASPACE */
 
 #  include "BLI_utildefines.h"
@@ -1596,6 +1623,7 @@ void BKE_sound_read_waveform(Main *bmain,
 {
   UNUSED_VARS(sound, stop, bmain);
 }
+
 void BKE_sound_update_sequencer(Main * /*main*/, bSound * /*sound*/) {}
 void BKE_sound_update_scene(Depsgraph * /*depsgraph*/, Scene * /*scene*/) {}
 void BKE_sound_update_scene_sound(void * /*handle*/, bSound * /*sound*/) {}
@@ -1653,6 +1681,26 @@ bool BKE_sound_stream_info_get(Main * /*main*/,
 
 #endif /* WITH_AUDASPACE */
 
+#if !defined(WITH_AUDASPACE) || !defined(WITH_RUBBERBAND)
+void *BKE_sound_add_time_stretch_effect(void * /*sound_handle*/, float /*fps*/)
+{
+  return nullptr;
+}
+
+void BKE_sound_set_scene_sound_time_stretch_at_frame(void * /*handle*/,
+                                                     int /*frame*/,
+                                                     float /*time_stretch*/,
+                                                     char /*animated*/)
+{
+}
+void BKE_sound_set_scene_sound_time_stretch_constant_range(void * /*handle*/,
+                                                           int /*frame_start*/,
+                                                           int /*frame_end*/,
+                                                           float /*time_stretch*/)
+{
+}
+#endif
+
 void BKE_sound_reset_scene_runtime(Scene *scene)
 {
   scene->sound_scene = nullptr;
diff --git a/source/blender/makesdna/DNA_sequence_types.h b/source/blender/makesdna/DNA_sequence_types.h
index 27c593d1456..c4afcc3dd95 100644
--- a/source/blender/makesdna/DNA_sequence_types.h
+++ b/source/blender/makesdna/DNA_sequence_types.h
@@ -748,7 +748,7 @@ typedef enum eStripFlag {
   /* Access scene strips directly (like a meta-strip). */
   SEQ_SCENE_STRIPS = (1 << 30),
 
-  SEQ_UNUSED_31 = (1u << 31),
+  SEQ_AUDIO_PITCH_CORRECTION = (1u << 31)
 } eStripFlag;
 
 /** #StripProxy.storage */
diff --git a/source/blender/makesrna/intern/rna_sequencer.cc b/source/blender/makesrna/intern/rna_sequencer.cc
index 3a7dc0b899e..312b2b74a77 100644
--- a/source/blender/makesrna/intern/rna_sequencer.cc
+++ b/source/blender/makesrna/intern/rna_sequencer.cc
@@ -3230,6 +3230,13 @@ static void rna_def_sound(BlenderRNA *brna)
       prop, "Display Waveform", "Display the audio waveform inside the strip");
   RNA_def_property_update(prop, NC_SCENE | ND_SEQUENCER, nullptr);
 
+  prop = RNA_def_property(srna, "pitch_correction", PROP_BOOLEAN, PROP_NONE);
+  RNA_def_property_boolean_sdna(prop, nullptr, "flag", SEQ_AUDIO_PITCH_CORRECTION);
+  RNA_def_property_ui_text(
+      prop,
+      "Preserve Pitch",
+      "Maintain the original pitch of the audio when changing playback speed");
+  RNA_def_property_update(prop, NC_SCENE | ND_SEQUENCER, "rna_Strip_sound_update");
   rna_def_retiming_keys(srna);
   rna_def_input(srna);
 }
diff --git a/source/blender/sequencer/CMakeLists.txt b/source/blender/sequencer/CMakeLists.txt
index bf287af3bc9..7d0a9958f9f 100644
--- a/source/blender/sequencer/CMakeLists.txt
+++ b/source/blender/sequencer/CMakeLists.txt
@@ -144,6 +144,9 @@ if(WITH_AUDASPACE)
   if(WITH_FFTW3)
     add_definitions(-DWITH_CONVOLUTION)
   endif()
+  if(WITH_RUBBERBAND)
+    add_definitions(-DWITH_RUBBERBAND)
+  endif()
 endif()
 
 blender_add_lib(bf_sequencer "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
diff --git a/source/blender/sequencer/intern/strip_add.cc b/source/blender/sequencer/intern/strip_add.cc
index 5c020fd184a..d870fa62229 100644
--- a/source/blender/sequencer/intern/strip_add.cc
+++ b/source/blender/sequencer/intern/strip_add.cc
@@ -350,6 +350,9 @@ Strip *add_sound_strip(Main *bmain, Scene *scene, ListBase *seqbase, LoadData *l
 
     /* Turn on Display Waveform by default. */
     strip->flag |= SEQ_AUDIO_DRAW_WAVEFORM;
+
+    /* Turn on Preserve Pitch by default. */
+    strip->flag |= SEQ_AUDIO_PITCH_CORRECTION;
   }
 
   strip_add_set_name(scene, strip, load_data);
diff --git a/source/blender/sequencer/intern/strip_retiming.cc b/source/blender/sequencer/intern/strip_retiming.cc
index aa07bd4aa0a..7c987ed3a2c 100644
--- a/source/blender/sequencer/intern/strip_retiming.cc
+++ b/source/blender/sequencer/intern/strip_retiming.cc
@@ -12,6 +12,7 @@
 
 #include "DNA_scene_types.h"
 #include "DNA_sequence_types.h"
+#include "DNA_sound_types.h"
 
 #include "BLI_listbase.h"
 #include "BLI_map.hh"
@@ -1047,6 +1048,27 @@ static RetimingRangeData strip_retiming_range_data_get(const Scene *scene, const
 
 void retiming_sound_animation_data_set(const Scene *scene, const Strip *strip)
 {
+
+  RetimingRangeData retiming_data = strip_retiming_range_data_get(scene, strip);
+
+  /* No need to apply the time-stretch effect if all the retiming range speeds are 1, as the
+   * effect itself is still expensive while the audio is playing and want to avoid having to use it
+   * whenever we can. */
+  bool correct_pitch = (strip->flag & SEQ_AUDIO_PITCH_CORRECTION) && strip->sound != nullptr &&
+                       std::any_of(retiming_data.ranges.begin(),
+                                   retiming_data.ranges.end(),
+                                   [](const RetimingRange &range) {
+                                     return range.type != TRANSITION && range.speed != 1.0;
+                                   });
+
+  void *sound_handle = strip->sound ? strip->sound->playback_handle : nullptr;
+  const float scene_fps = float(scene->r.frs_sec) / float(scene->r.frs_sec_base);
+  if (correct_pitch) {
+    sound_handle = BKE_sound_add_time_stretch_effect(sound_handle, scene_fps);
+    BKE_sound_set_scene_sound_pitch_constant_range(
+        strip->scene_sound, 0, strip->start + strip->len, 1.0f);
+  }
+
   /* Content cut off by `anim_startofs` is as if it does not exist for sequencer. But Audaspace
    * seeking relies on having animation buffer initialized for whole sequence. */
   if (strip->anim_startofs > 0) {
@@ -1055,26 +1077,39 @@ void retiming_sound_animation_data_set(const Scene *scene, const Strip *strip)
         strip->scene_sound, strip_start - strip->anim_startofs, strip_start, 1.0f);
   }
 
-  const float scene_fps = float(scene->r.frs_sec) / float(scene->r.frs_sec_base);
   const int sound_offset = time_get_rounded_sound_offset(strip, scene_fps);
 
-  RetimingRangeData retiming_data = strip_retiming_range_data_get(scene, strip);
   for (int i = 0; i < retiming_data.ranges.size(); i++) {
-    RetimingRange range = retiming_data.ranges[i];
+    const RetimingRange &range = retiming_data.ranges[i];
     if (range.type == TRANSITION) {
-
       const int range_length = range.end - range.start;
       for (int i = 0; i <= range_length; i++) {
         const int frame = range.start + i;
-        BKE_sound_set_scene_sound_pitch_at_frame(
-            strip->scene_sound, frame + sound_offset, range.speed_table[i], true);
+        if (correct_pitch) {
+          BKE_sound_set_scene_sound_time_stretch_at_frame(
+              sound_handle, frame - strip->start, 1.0 / range.speed_table[i], true);
+        }
+        else {
+          BKE_sound_set_scene_sound_pitch_at_frame(
+              strip->scene_sound, frame + sound_offset, range.speed_table[i], true);
+        }
       }
     }
     else {
-      BKE_sound_set_scene_sound_pitch_constant_range(
-          strip->scene_sound, range.start + sound_offset, range.end + sound_offset, range.speed);
+      if (correct_pitch) {
+        BKE_sound_set_scene_sound_time_stretch_constant_range(
+            sound_handle, range.start - strip->start, range.end - strip->start, 1.0 / range.speed);
+      }
+      else {
+        BKE_sound_set_scene_sound_pitch_constant_range(
+            strip->scene_sound, range.start + sound_offset, range.end + sound_offset, range.speed);
+      }
     }
   }
+
+  if (correct_pitch) {
+    BKE_sound_update_sequence_handle(strip->scene_sound, sound_handle);
+  }
 }
 
 bool retiming_selection_clear(const Editing *ed)