From 4ec0cce30a07fcd481f9b886bbfdc90334335f53 Mon Sep 17 00:00:00 2001 From: Campbell Barton Date: Fri, 13 Sep 2024 11:18:10 +1000 Subject: [PATCH] WM: improve thumbnail scaling performance (debug builds) Saving files could take ~3-4 seconds on debug builds because of new imbuf scaling logic. Even though debug performance usually isn't much of a consideration, it gets in the way of development. Since thumbnails don't require the same accuracy as the sequencer or compositor, use a faster scaling method that uses a box-filter clamped to integer bounds & integer math. In practice the difference between the resulting thumbnails isn't noticeable in my tests. For debug build with ASAN this gives a ~25x speedup, for release builds it gives a ~1.4x speedup which is to be expected with a more approximate scaling method. --- source/blender/blenkernel/BKE_main.hh | 12 ++ source/blender/blenkernel/intern/main.cc | 22 +++ .../blender/windowmanager/intern/wm_files.cc | 140 ++++++++++++++++-- 3 files changed, 164 insertions(+), 10 deletions(-) diff --git a/source/blender/blenkernel/BKE_main.hh b/source/blender/blenkernel/BKE_main.hh index 675cdc649ac..c03da01f120 100644 --- a/source/blender/blenkernel/BKE_main.hh +++ b/source/blender/blenkernel/BKE_main.hh @@ -504,6 +504,18 @@ void BKE_main_library_weak_reference_remove_item( } \ ((void)0) +/** + * Generates a raw .blend file thumbnail data from a raw image buffer. + * + * \param bmain: If not NULL, also store generated data in this Main. + * \param rect: RGBA image buffer. + * \param size: The size of `rect`. + * \return The generated .blend file raw thumbnail data. + */ +BlendThumbnail *BKE_main_thumbnail_from_buffer(Main *bmain, + const uint8_t *rect, + const int size[2]); + /** * Generates a raw .blend file thumbnail data from given image. * diff --git a/source/blender/blenkernel/intern/main.cc b/source/blender/blenkernel/intern/main.cc index 5abeb8c68c9..b9ddfeec23f 100644 --- a/source/blender/blenkernel/intern/main.cc +++ b/source/blender/blenkernel/intern/main.cc @@ -757,6 +757,28 @@ void BKE_main_library_weak_reference_remove_item( MEM_SAFE_FREE(old_id->library_weak_reference); } +BlendThumbnail *BKE_main_thumbnail_from_buffer(Main *bmain, const uint8_t *rect, const int size[2]) +{ + BlendThumbnail *data = nullptr; + + if (bmain) { + MEM_SAFE_FREE(bmain->blen_thumb); + } + + if (rect) { + const size_t data_size = BLEN_THUMB_MEMSIZE(size[0], size[1]); + data = static_cast(MEM_mallocN(data_size, __func__)); + data->width = size[0]; + data->height = size[1]; + memcpy(data->rect, rect, data_size - sizeof(*data)); + } + + if (bmain) { + bmain->blen_thumb = data; + } + return data; +} + BlendThumbnail *BKE_main_thumbnail_from_imbuf(Main *bmain, ImBuf *img) { BlendThumbnail *data = nullptr; diff --git a/source/blender/windowmanager/intern/wm_files.cc b/source/blender/windowmanager/intern/wm_files.cc index 5ac8e47a2e7..a3fbdbeb23d 100644 --- a/source/blender/windowmanager/intern/wm_files.cc +++ b/source/blender/windowmanager/intern/wm_files.cc @@ -151,6 +151,17 @@ static void wm_test_autorun_revert_action_exec(bContext *C); static CLG_LogRef LOG = {"wm.files"}; +/** + * Fast-path for down-scaling byte buffers. + * + * NOTE(@ideasman42) Support alternate logic for scaling byte buffers for + * thumbnails which doesn't use the higher quality box-filtered floating point math. + * This may be removed if similar performance can be achieved from other scale methods, + * especially in debug mode - which could cause file saving to be unreasonably slow + * (taking seconds just down-scaling the thumbnail). + */ +#define USE_THUMBNAIL_FAST_DOWNSCALE + /* -------------------------------------------------------------------- */ /** \name Misc Utility Functions * \{ */ @@ -1742,6 +1753,99 @@ static void wm_history_file_update() * * \{ */ +#ifdef USE_THUMBNAIL_FAST_DOWNSCALE +static uint8_t *blend_file_thumb_fast_downscale(const uint8_t *src_rect, + const int src_size[2], + const int dst_size[2]) +{ + /* NOTE: this is a faster alternative to #IMBScaleFilter::Box which is + * especially slow in debug builds, normally debug performance isn't a + * consideration however it's slow enough to get in the way of development. + * In release builds this gives ~1.4x speedup. */ + + /* Scaling using a box-filter where each box uses an integer-rounded region. + * Accept a slightly lower quality scale as this is only for thumbnails. + * In practice the result is visually indistinguishable. + * + * Technically the color accumulation *could* overflow (creating some invalid pixels), + * however this would require the source image to be larger than + * 65,535 pixels squared (when scaling down to 256x256). + * As the source input is a screenshot or a small camera render created for the thumbnail, + * this isn't a concern. */ + + BLI_assert(dst_size[0] <= src_size[0] && dst_size[1] <= src_size[1]); + uint8_t *dst_rect = static_cast( + MEM_mallocN(sizeof(uint8_t[4]) * dst_size[0] * dst_size[1], __func__)); + + /* A row, the width of the destination to accumulate pixel values into + * before writing into the image. */ + uint32_t *accum_row = static_cast( + MEM_callocN(sizeof(uint32_t) * dst_size[0] * 4, __func__)); + +# ifndef NDEBUG + /* Assert that samples are calculated correctly. */ + uint64_t sample_count_all = 0; +# endif + + const uint32_t src_size_x = src_size[0]; + const uint32_t src_size_y = src_size[1]; + + const uint32_t dst_size_x = dst_size[0]; + const uint32_t dst_size_y = dst_size[1]; + const uint8_t *src_px = src_rect; + + uint32_t src_y = 0; + for (uint32_t dst_y = 0; dst_y < dst_size_y; dst_y++) { + const uint32_t src_y_beg = src_y; + const uint32_t src_y_end = ((dst_y + 1) * src_size_y) / dst_size_y; + for (; src_y < src_y_end; src_y++) { + uint32_t *accum = accum_row; + uint32_t src_x = 0; + for (uint32_t dst_x = 0; dst_x < dst_size_x; dst_x++, accum += 4) { + const uint32_t src_x_end = ((dst_x + 1) * src_size_x) / dst_size_x; + for (; src_x < src_x_end; src_x++) { + accum[0] += uint32_t(src_px[0]); + accum[1] += uint32_t(src_px[1]); + accum[2] += uint32_t(src_px[2]); + accum[3] += uint32_t(src_px[3]); + src_px += 4; + } + BLI_assert(src_x == src_x_end); + } + BLI_assert(accum == accum_row + (4 * dst_size[0])); + } + + uint32_t *accum = accum_row; + uint8_t *dst_px = dst_rect + ((dst_y * dst_size_x) * 4); + uint32_t src_x_beg = 0; + const uint32_t span_y = src_y_end - src_y_beg; + for (uint32_t dst_x = 0; dst_x < dst_size_x; dst_x++) { + const uint32_t src_x_end = ((dst_x + 1) * src_size_x) / dst_size_x; + const uint32_t span_x = src_x_end - src_x_beg; + + const uint32_t sample_count = span_x * span_y; + dst_px[0] = uint8_t(accum[0] / sample_count); + dst_px[1] = uint8_t(accum[1] / sample_count); + dst_px[2] = uint8_t(accum[2] / sample_count); + dst_px[3] = uint8_t(accum[3] / sample_count); + accum[0] = accum[1] = accum[2] = accum[3] = 0; + accum += 4; + dst_px += 4; + + src_x_beg = src_x_end; +# ifndef NDEBUG + sample_count_all += sample_count; +# endif + } + } + BLI_assert(src_px == src_rect + (sizeof(uint8_t[4]) * src_size[0] * src_size[1])); + BLI_assert(sample_count_all == size_t(src_size[0]) * size_t(src_size[1])); + + MEM_freeN(accum_row); + return dst_rect; +} +#endif /* USE_THUMBNAIL_FAST_DOWNSCALE */ + static blender::int2 blend_file_thumb_clamp_size(const int size[2], const int limit) { blender::int2 result; @@ -1783,19 +1887,35 @@ static ImBuf *blend_file_thumb_from_screenshot(bContext *C, BlendThumbnail **r_t const blender::int2 thumb_size_2x = blend_file_thumb_clamp_size(win_size, BLEN_THUMB_SIZE * 2); const blender::int2 thumb_size = blend_file_thumb_clamp_size(win_size, BLEN_THUMB_SIZE); - ibuf = IMB_allocFromBufferOwn(buffer, nullptr, win_size[0], win_size[1], 24); - BLI_assert(ibuf != nullptr); /* Never expected to fail. */ +#ifdef USE_THUMBNAIL_FAST_DOWNSCALE + if ((thumb_size_2x[0] <= win_size[0]) && (thumb_size_2x[1] <= win_size[1])) { + uint8_t *rect_2x = blend_file_thumb_fast_downscale(buffer, win_size, thumb_size_2x); + uint8_t *rect = blend_file_thumb_fast_downscale(rect_2x, thumb_size_2x, thumb_size); - /* File-system thumbnail image can be 256x256. */ - IMB_scale(ibuf, thumb_size_2x.x, thumb_size_2x.y, IMBScaleFilter::Box, false); + MEM_freeN(buffer); + ibuf = IMB_allocFromBufferOwn(rect_2x, nullptr, thumb_size_2x.x, thumb_size_2x.y, 24); - /* Thumbnail inside blend should be 128x128. */ - ImBuf *thumb_ibuf = IMB_dupImBuf(ibuf); - IMB_scale(thumb_ibuf, thumb_size.x, thumb_size.y, IMBScaleFilter::Box, false); + BlendThumbnail *thumb = BKE_main_thumbnail_from_buffer(nullptr, rect, thumb_size); + MEM_freeN(rect); + *r_thumb = thumb; + } + else +#endif /* USE_THUMBNAIL_FAST_DOWNSCALE */ + { + ibuf = IMB_allocFromBufferOwn(buffer, nullptr, win_size[0], win_size[1], 24); + BLI_assert(ibuf != nullptr); /* Never expected to fail. */ - BlendThumbnail *thumb = BKE_main_thumbnail_from_imbuf(nullptr, thumb_ibuf); - IMB_freeImBuf(thumb_ibuf); - *r_thumb = thumb; + /* File-system thumbnail image can be 256x256. */ + IMB_scale(ibuf, thumb_size_2x.x, thumb_size_2x.y, IMBScaleFilter::Box, false); + + /* Thumbnail inside blend should be 128x128. */ + ImBuf *thumb_ibuf = IMB_dupImBuf(ibuf); + IMB_scale(thumb_ibuf, thumb_size.x, thumb_size.y, IMBScaleFilter::Box, false); + + BlendThumbnail *thumb = BKE_main_thumbnail_from_imbuf(nullptr, thumb_ibuf); + IMB_freeImBuf(thumb_ibuf); + *r_thumb = thumb; + } } if (ibuf) {