From c989add4cd197762b2b4860578804eca413796af Mon Sep 17 00:00:00 2001 From: Kace Date: Fri, 3 Oct 2025 09:42:48 +0200 Subject: [PATCH] VSE: Sound pitch correction Add sound pitch correction functionality, so that retimed sound strips can preserve their original pitch. This has been implemented as a GSoC 2025 project. Actual pitch correction is done with Rubberband library, which has been already included into Blender 5.0 library builds; and then most of the other code has been in Audaspace, which was already updated within Blender tree earlier. So this PR just flips on Rubberband build option, and adds the checkbox to VSE sound strips for pitch correction (on by default for newly created sound strips). Pitch correction works with both simple whole-strip retiming, as well as more complex retiming setups where different parts of the strip use different speeds. Co-authored-by: Aras Pranckevicius Pull Request: https://projects.blender.org/blender/blender/pulls/143347 --- CMakeLists.txt | 2 +- build_files/cmake/config/blender_full.cmake | 1 + build_files/cmake/config/blender_lite.cmake | 2 +- .../cmake/config/blender_release.cmake | 1 + scripts/startup/bl_ui/space_sequencer.py | 4 ++ source/blender/blenkernel/BKE_sound.h | 12 +++++ source/blender/blenkernel/CMakeLists.txt | 4 ++ source/blender/blenkernel/intern/sound.cc | 48 +++++++++++++++++ source/blender/makesdna/DNA_sequence_types.h | 2 +- .../blender/makesrna/intern/rna_sequencer.cc | 7 +++ source/blender/sequencer/CMakeLists.txt | 3 ++ source/blender/sequencer/intern/strip_add.cc | 3 ++ .../sequencer/intern/strip_retiming.cc | 51 ++++++++++++++++--- 13 files changed, 129 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be11c314cd2..fab9b3d0320 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,7 +420,7 @@ mark_as_advanced(WITH_SYSTEM_AUDASPACE) if(WITH_AUDASPACE AND NOT WITH_SYSTEM_AUDASPACE) option(WITH_RUBBERBAND "\ Build with Rubber Band for audio time-stretching and pitch-scaling (used by Audaspace)" - OFF + ON ) endif() diff --git a/build_files/cmake/config/blender_full.cmake b/build_files/cmake/config/blender_full.cmake index 39c6b711c33..a84aad23b88 100644 --- a/build_files/cmake/config/blender_full.cmake +++ b/build_files/cmake/config/blender_full.cmake @@ -49,6 +49,7 @@ set(WITH_POTRACE ON CACHE BOOL "" FORCE) set(WITH_PUGIXML ON CACHE BOOL "" FORCE) set(WITH_PYTHON_INSTALL ON CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW ON CACHE BOOL "" FORCE) +set(WITH_RUBBERBAND ON CACHE BOOL "" FORCE) set(WITH_SDL OFF CACHE BOOL "" FORCE) set(WITH_TBB ON CACHE BOOL "" FORCE) set(WITH_USD ON CACHE BOOL "" FORCE) diff --git a/build_files/cmake/config/blender_lite.cmake b/build_files/cmake/config/blender_lite.cmake index d07c32d00ad..419b81d66c0 100644 --- a/build_files/cmake/config/blender_lite.cmake +++ b/build_files/cmake/config/blender_lite.cmake @@ -52,12 +52,12 @@ set(WITH_OPENCOLORIO OFF CACHE BOOL "" FORCE) set(WITH_OPENIMAGEDENOISE OFF CACHE BOOL "" FORCE) set(WITH_OPENSUBDIV OFF CACHE BOOL "" FORCE) set(WITH_OPENVDB OFF CACHE BOOL "" FORCE) - set(WITH_POTRACE OFF CACHE BOOL "" FORCE) set(WITH_PUGIXML OFF CACHE BOOL "" FORCE) set(WITH_PULSEAUDIO OFF CACHE BOOL "" FORCE) set(WITH_PIPEWIRE OFF CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW OFF CACHE BOOL "" FORCE) +set(WITH_RUBBERBAND OFF CACHE BOOL "" FORCE) set(WITH_SDL OFF CACHE BOOL "" FORCE) set(WITH_TBB OFF CACHE BOOL "" FORCE) set(WITH_USD OFF CACHE BOOL "" FORCE) diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index f5029e25f2c..6cc087c65e6 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -52,6 +52,7 @@ set(WITH_OPENVDB_BLOSC ON CACHE BOOL "" FORCE) set(WITH_POTRACE ON CACHE BOOL "" FORCE) set(WITH_PUGIXML ON CACHE BOOL "" FORCE) set(WITH_PYTHON_INSTALL ON CACHE BOOL "" FORCE) +set(WITH_RUBBERBAND ON CACHE BOOL "" FORCE) set(WITH_QUADRIFLOW ON CACHE BOOL "" FORCE) set(WITH_SDL OFF CACHE BOOL "" FORCE) set(WITH_TBB ON CACHE BOOL "" FORCE) diff --git a/scripts/startup/bl_ui/space_sequencer.py b/scripts/startup/bl_ui/space_sequencer.py index 0d56a5f73f3..8ea667b06ec 100644 --- a/scripts/startup/bl_ui/space_sequencer.py +++ b/scripts/startup/bl_ui/space_sequencer.py @@ -2502,6 +2502,10 @@ class SEQUENCER_PT_adjust_sound(SequencerButtonsPanel, Panel): layout.use_property_split = False col = layout.column() + split = col.split(factor=0.4) + split.label(text="") + split.prop(strip, "pitch_correction") + if overlay_settings.waveform_display_type == 'DEFAULT_WAVEFORMS': split = col.split(factor=0.4) split.label(text="") diff --git a/source/blender/blenkernel/BKE_sound.h b/source/blender/blenkernel/BKE_sound.h index 4c7a622bc4c..d791c6f7e9d 100644 --- a/source/blender/blenkernel/BKE_sound.h +++ b/source/blender/blenkernel/BKE_sound.h @@ -170,6 +170,16 @@ void BKE_sound_set_scene_sound_pitch_constant_range(void *handle, int frame_end, float pitch); +void BKE_sound_set_scene_sound_time_stretch_at_frame(void *handle, + int frame, + float time_stretch, + char animated); + +void BKE_sound_set_scene_sound_time_stretch_constant_range(void *handle, + int frame_start, + int frame_end, + float time_stretch); + void BKE_sound_set_scene_sound_pan_at_frame(void *handle, int frame, float pan, char animated); void BKE_sound_update_sequencer(struct Main *main, struct bSound *sound); @@ -204,3 +214,5 @@ void BKE_sound_jack_scene_update(struct Scene *scene, int mode, double time); struct Depsgraph; void BKE_sound_evaluate(struct Depsgraph *depsgraph, struct Main *bmain, struct bSound *sound); + +void *BKE_sound_add_time_stretch_effect(void *sound_handle, float fps); diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt index 652af171d5e..bd1ab984e9c 100644 --- a/source/blender/blenkernel/CMakeLists.txt +++ b/source/blender/blenkernel/CMakeLists.txt @@ -628,6 +628,10 @@ if(WITH_AUDASPACE) ) endif() add_definitions(-DWITH_AUDASPACE) + + if(WITH_RUBBERBAND) + add_definitions(-DWITH_RUBBERBAND) + endif() endif() if(WITH_BULLET) diff --git a/source/blender/blenkernel/intern/sound.cc b/source/blender/blenkernel/intern/sound.cc index db2d32a67fc..d25fcd806ff 100644 --- a/source/blender/blenkernel/intern/sound.cc +++ b/source/blender/blenkernel/intern/sound.cc @@ -45,6 +45,7 @@ # include # include # include +# include #endif #include "BKE_bpath.hh" @@ -1528,6 +1529,32 @@ bool BKE_sound_stream_info_get(Main *main, return true; } +# ifdef WITH_RUBBERBAND +void *BKE_sound_add_time_stretch_effect(void *sound_handle, float fps) +{ + return AUD_Sound_animateableTimeStretchPitchScale( + sound_handle, fps, 1.0, 1.0, AUD_STRETCHER_QUALITY_HIGH, false); +} +void BKE_sound_set_scene_sound_time_stretch_at_frame(void *handle, + int frame, + float time_stretch, + char animated) +{ + AUD_Sound_animateableTimeStretchPitchScale_setAnimationData( + handle, AUD_AP_TIME_STRETCH, frame, &time_stretch, animated); +} +void BKE_sound_set_scene_sound_time_stretch_constant_range(void *handle, + int frame_start, + int frame_end, + float time_stretch) +{ + frame_start = max_ii(0, frame_start); + frame_end = max_ii(0, frame_end); + AUD_Sound_animateableTimeStretchPitchScale_setConstantRangeAnimationData( + handle, AUD_AP_TIME_STRETCH, frame_start, frame_end, &time_stretch); +} +# endif /* WITH_RUBBERBAND */ + #else /* WITH_AUDASPACE */ # include "BLI_utildefines.h" @@ -1596,6 +1623,7 @@ void BKE_sound_read_waveform(Main *bmain, { UNUSED_VARS(sound, stop, bmain); } + void BKE_sound_update_sequencer(Main * /*main*/, bSound * /*sound*/) {} void BKE_sound_update_scene(Depsgraph * /*depsgraph*/, Scene * /*scene*/) {} void BKE_sound_update_scene_sound(void * /*handle*/, bSound * /*sound*/) {} @@ -1653,6 +1681,26 @@ bool BKE_sound_stream_info_get(Main * /*main*/, #endif /* WITH_AUDASPACE */ +#if !defined(WITH_AUDASPACE) || !defined(WITH_RUBBERBAND) +void *BKE_sound_add_time_stretch_effect(void * /*sound_handle*/, float /*fps*/) +{ + return nullptr; +} + +void BKE_sound_set_scene_sound_time_stretch_at_frame(void * /*handle*/, + int /*frame*/, + float /*time_stretch*/, + char /*animated*/) +{ +} +void BKE_sound_set_scene_sound_time_stretch_constant_range(void * /*handle*/, + int /*frame_start*/, + int /*frame_end*/, + float /*time_stretch*/) +{ +} +#endif + void BKE_sound_reset_scene_runtime(Scene *scene) { scene->sound_scene = nullptr; diff --git a/source/blender/makesdna/DNA_sequence_types.h b/source/blender/makesdna/DNA_sequence_types.h index 27c593d1456..c4afcc3dd95 100644 --- a/source/blender/makesdna/DNA_sequence_types.h +++ b/source/blender/makesdna/DNA_sequence_types.h @@ -748,7 +748,7 @@ typedef enum eStripFlag { /* Access scene strips directly (like a meta-strip). */ SEQ_SCENE_STRIPS = (1 << 30), - SEQ_UNUSED_31 = (1u << 31), + SEQ_AUDIO_PITCH_CORRECTION = (1u << 31) } eStripFlag; /** #StripProxy.storage */ diff --git a/source/blender/makesrna/intern/rna_sequencer.cc b/source/blender/makesrna/intern/rna_sequencer.cc index 3a7dc0b899e..312b2b74a77 100644 --- a/source/blender/makesrna/intern/rna_sequencer.cc +++ b/source/blender/makesrna/intern/rna_sequencer.cc @@ -3230,6 +3230,13 @@ static void rna_def_sound(BlenderRNA *brna) prop, "Display Waveform", "Display the audio waveform inside the strip"); RNA_def_property_update(prop, NC_SCENE | ND_SEQUENCER, nullptr); + prop = RNA_def_property(srna, "pitch_correction", PROP_BOOLEAN, PROP_NONE); + RNA_def_property_boolean_sdna(prop, nullptr, "flag", SEQ_AUDIO_PITCH_CORRECTION); + RNA_def_property_ui_text( + prop, + "Preserve Pitch", + "Maintain the original pitch of the audio when changing playback speed"); + RNA_def_property_update(prop, NC_SCENE | ND_SEQUENCER, "rna_Strip_sound_update"); rna_def_retiming_keys(srna); rna_def_input(srna); } diff --git a/source/blender/sequencer/CMakeLists.txt b/source/blender/sequencer/CMakeLists.txt index bf287af3bc9..7d0a9958f9f 100644 --- a/source/blender/sequencer/CMakeLists.txt +++ b/source/blender/sequencer/CMakeLists.txt @@ -144,6 +144,9 @@ if(WITH_AUDASPACE) if(WITH_FFTW3) add_definitions(-DWITH_CONVOLUTION) endif() + if(WITH_RUBBERBAND) + add_definitions(-DWITH_RUBBERBAND) + endif() endif() blender_add_lib(bf_sequencer "${SRC}" "${INC}" "${INC_SYS}" "${LIB}") diff --git a/source/blender/sequencer/intern/strip_add.cc b/source/blender/sequencer/intern/strip_add.cc index 5c020fd184a..d870fa62229 100644 --- a/source/blender/sequencer/intern/strip_add.cc +++ b/source/blender/sequencer/intern/strip_add.cc @@ -350,6 +350,9 @@ Strip *add_sound_strip(Main *bmain, Scene *scene, ListBase *seqbase, LoadData *l /* Turn on Display Waveform by default. */ strip->flag |= SEQ_AUDIO_DRAW_WAVEFORM; + + /* Turn on Preserve Pitch by default. */ + strip->flag |= SEQ_AUDIO_PITCH_CORRECTION; } strip_add_set_name(scene, strip, load_data); diff --git a/source/blender/sequencer/intern/strip_retiming.cc b/source/blender/sequencer/intern/strip_retiming.cc index aa07bd4aa0a..7c987ed3a2c 100644 --- a/source/blender/sequencer/intern/strip_retiming.cc +++ b/source/blender/sequencer/intern/strip_retiming.cc @@ -12,6 +12,7 @@ #include "DNA_scene_types.h" #include "DNA_sequence_types.h" +#include "DNA_sound_types.h" #include "BLI_listbase.h" #include "BLI_map.hh" @@ -1047,6 +1048,27 @@ static RetimingRangeData strip_retiming_range_data_get(const Scene *scene, const void retiming_sound_animation_data_set(const Scene *scene, const Strip *strip) { + + RetimingRangeData retiming_data = strip_retiming_range_data_get(scene, strip); + + /* No need to apply the time-stretch effect if all the retiming range speeds are 1, as the + * effect itself is still expensive while the audio is playing and want to avoid having to use it + * whenever we can. */ + bool correct_pitch = (strip->flag & SEQ_AUDIO_PITCH_CORRECTION) && strip->sound != nullptr && + std::any_of(retiming_data.ranges.begin(), + retiming_data.ranges.end(), + [](const RetimingRange &range) { + return range.type != TRANSITION && range.speed != 1.0; + }); + + void *sound_handle = strip->sound ? strip->sound->playback_handle : nullptr; + const float scene_fps = float(scene->r.frs_sec) / float(scene->r.frs_sec_base); + if (correct_pitch) { + sound_handle = BKE_sound_add_time_stretch_effect(sound_handle, scene_fps); + BKE_sound_set_scene_sound_pitch_constant_range( + strip->scene_sound, 0, strip->start + strip->len, 1.0f); + } + /* Content cut off by `anim_startofs` is as if it does not exist for sequencer. But Audaspace * seeking relies on having animation buffer initialized for whole sequence. */ if (strip->anim_startofs > 0) { @@ -1055,26 +1077,39 @@ void retiming_sound_animation_data_set(const Scene *scene, const Strip *strip) strip->scene_sound, strip_start - strip->anim_startofs, strip_start, 1.0f); } - const float scene_fps = float(scene->r.frs_sec) / float(scene->r.frs_sec_base); const int sound_offset = time_get_rounded_sound_offset(strip, scene_fps); - RetimingRangeData retiming_data = strip_retiming_range_data_get(scene, strip); for (int i = 0; i < retiming_data.ranges.size(); i++) { - RetimingRange range = retiming_data.ranges[i]; + const RetimingRange &range = retiming_data.ranges[i]; if (range.type == TRANSITION) { - const int range_length = range.end - range.start; for (int i = 0; i <= range_length; i++) { const int frame = range.start + i; - BKE_sound_set_scene_sound_pitch_at_frame( - strip->scene_sound, frame + sound_offset, range.speed_table[i], true); + if (correct_pitch) { + BKE_sound_set_scene_sound_time_stretch_at_frame( + sound_handle, frame - strip->start, 1.0 / range.speed_table[i], true); + } + else { + BKE_sound_set_scene_sound_pitch_at_frame( + strip->scene_sound, frame + sound_offset, range.speed_table[i], true); + } } } else { - BKE_sound_set_scene_sound_pitch_constant_range( - strip->scene_sound, range.start + sound_offset, range.end + sound_offset, range.speed); + if (correct_pitch) { + BKE_sound_set_scene_sound_time_stretch_constant_range( + sound_handle, range.start - strip->start, range.end - strip->start, 1.0 / range.speed); + } + else { + BKE_sound_set_scene_sound_pitch_constant_range( + strip->scene_sound, range.start + sound_offset, range.end + sound_offset, range.speed); + } } } + + if (correct_pitch) { + BKE_sound_update_sequence_handle(strip->scene_sound, sound_handle); + } } bool retiming_selection_clear(const Editing *ed)