BLI_array_store: support run-length encoding / decoding

Add RLE encoding/decoding functions for byte arrays for the purpose of pre-processing arrays with large spans of (mostly) uniform values before storing them in a BArrayState. Part of a fix for #136737.
2025-05-27 05:48:48 +00:00
parent 389b26d317
commit 79eea8208d
4 changed files with 679 additions and 2 deletions
--- a/source/blender/blenlib/BLI_array_store.h
+++ b/source/blender/blenlib/BLI_array_store.h
@@ -96,3 +96,32 @@ void *BLI_array_store_state_data_get_alloc(const BArrayState *state, size_t *r_d
 * \note Only for tests.
 */
 bool BLI_array_store_is_valid(BArrayStore *bs);
+
+/* `array_store_rle.cc` */
+
+/**
+ * Return a run-length encoded copy of `data_dec`.
+ *
+ * \param data_dec: The data to encode.
+ * \param data_dec_len: The size of the data to encode.
+ * \param data_enc_extra_size: Allocate extra memory at the beginning of the array.
+ * - This doesn't impact the value of `r_data_enc_len`.
+ * - This must be skipped when decoding.
+ * \param r_data_enc_len: The size of the resulting RLE encoded data.
+ */
+uint8_t *BLI_array_store_rle_encode(const uint8_t *data_dec,
+                                    size_t data_dec_len,
+                                    size_t data_enc_extra_size,
+                                    size_t *r_data_enc_len);
+/**
+ *  Decode a run-length encoded array, writing the result into `data_dec_v`.
+ *
+ * \param data_enc: The data to encode (returned by #BLI_array_store_rle_encode).
+ * \param data_enc_len: The size of `data_enc`.
+ * \param data_dec: The destination for the decoded data to be written to.
+ * \param data_dec_len: The size of the destination (as passed to #BLI_array_store_rle_encode).
+ */
+void BLI_array_store_rle_decode(const uint8_t *data_enc,
+                                const size_t data_enc_len,
+                                void *data_dec_v,
+                                const size_t data_dec_len);
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -40,6 +40,7 @@ set(SRC
  intern/BLI_subprocess.cc
  intern/BLI_timer.cc
  intern/array_store.cc
+  intern/array_store_rle.cc
  intern/array_store_utils.cc
  intern/array_utils.cc
  intern/array_utils_c.cc
--- a/source/blender/blenlib/intern/array_store_rle.cc
+++ b/source/blender/blenlib/intern/array_store_rle.cc
@@ -0,0 +1,410 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup bli
+ *
+ * \brief Run length encoding for arrays.
+ *
+ * The intended use is to pre-process arrays before storing in #BArrayStore.
+ * This should be used in cases arrays are likely to contain large spans of contiguous data
+ * (which doesn't de-duplicate so well).
+ *
+ * Intended for byte arrays as there is no special logic to handle alignment.
+ * Note that this this could be supported and would be useful to de-duplicate
+ * repeating patterns of non-byte data.
+ *
+ * Notes:
+ * - For random data, the size overhead is only `sizeof(size_t[4])` (header & footer).
+ *
+ * - The main down-side in that case of random data is detecting there are no spans to RLE encode,
+ *   and creating the "encoded" copy.
+ *
+ * - For an array containing a single value the resulting size
+ *   will be `sizeof(size_t[3]) + sizeof(uint8_t)`.
+ *
+ * - This is not intended to be used for compression, it would be possible
+ *   to use less memory by packing the size of short spans into fewer bits.
+ *   This isn't done as it requires more computation when encoding.
+ *
+ * - This RLE implementation is a balance between working well for random bytes
+ *   as well as arrays containing large contiguous spans.
+ *
+ *   There is *some* bias towards performing well with arrays containing contiguous spans
+ *   mainly because the benefits are greater and the likelihood is that RLE encoding is used
+ *   because there is a probability the data will be able to take advantage of RLE.
+ *   Having said this - encoding random bytes must not be *slow* either.
+ */
+
+#include <cstdlib>
+#include <cstring>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_assert.h"
+#include "BLI_utildefines.h"
+
+#include "BLI_array_store.h" /* Own include. */
+
+#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
+
+/* -------------------------------------------------------------------- */
+/** \name Internal Utilities
+ * \{ */
+
+/**
+ * Use faster method of spanning for change by stepping over larger values.
+ *
+ * NOTE(@ideasman42) In practice this gives ~3.5x overall speedup when encoding large arrays.
+ * For random data the performance is worse, about ~5% slower.
+ */
+#define USE_FIND_FASTPATH
+
+static size_t find_byte_not_equal_to(const uint8_t *data,
+                                     size_t offset,
+                                     const size_t size,
+                                     const uint8_t value)
+{
+  BLI_assert(offset <= size);
+
+#ifdef USE_FIND_FASTPATH
+  using fast_int = uintptr_t;
+
+  /* In the case of random data, early exit without entering more involved steps. */
+  constexpr size_t min_size_for_fast_path = sizeof(size_t[2]) + sizeof(fast_int[2]);
+  if (LIKELY(size - offset > min_size_for_fast_path)) {
+    /* Scan forward with a fixed size to check if an early exit
+     * is needed (this may exit on the first few bytes). */
+    const uint8_t *p = data + offset;
+    const uint8_t *p_end = p + sizeof(size_t[2]);
+    do {
+      if (LIKELY(*p != value)) {
+        return size_t(p - data);
+      }
+      p++;
+    } while (p < p_end);
+    /* `offset` is no longer valid and needs to be updated from `p` before use. */
+
+    /* Scan forward at least `sizeof(size_t)` bytes,
+     * aligned to the next `sizeof(fast_int)` aligned boundary. */
+    p_end = reinterpret_cast<const uint8_t *>(
+        ((uintptr_t(p) + sizeof(size_t) + sizeof(fast_int)) & ~(sizeof(fast_int) - 1)));
+    do {
+      if (LIKELY(*p != value)) {
+        return size_t(p - data);
+      }
+      p++;
+    } while (p < p_end);
+
+    /* Scan forward the `fast_int` aligned chunks (the fast path).
+     * This block is responsible for scanning over large spans of contiguous bytes. */
+
+    /* There are at least `sizeof(size_t[2])` number of bytes all equal.
+     * Use `fast_int` aligned reads for a faster search. */
+    const fast_int *p_fast = reinterpret_cast<const fast_int *>(p_end);
+    const fast_int *p_fast_end = reinterpret_cast<const fast_int *>(data +
+                                                                    (size - sizeof(fast_int)));
+    fast_int value_fast;
+    memset(&value_fast, value, sizeof(value_fast));
+    do {
+      /* Use unlikely given many of the previous bytes match. */
+      if (UNLIKELY(*p_fast != value_fast)) {
+        break;
+      }
+      p_fast++;
+    } while (p_fast < p_fast_end);
+    offset = size_t(reinterpret_cast<const uint8_t *>(p_fast) - data);
+    /* Perform byte level check with any trailing data. */
+  }
+#endif /* USE_FIND_FASTPATH */
+
+  while ((offset < size) && (value == data[offset])) {
+    offset += 1;
+  }
+  return offset;
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Private API
+ * \{ */
+
+struct RLE_Head {
+  /**
+   * - When zero, this struct is interpreted as a #RLE_Literal.
+   * - When non-zero, this struct is interpreted as a #RLE_Span.
+   *   The `value` is a `uint_8` (to reduce the size of the struct).
+   */
+  size_t span_size;
+};
+
+struct RLE_Literal {
+  uint8_t _span_size_pad[sizeof(size_t)];
+  size_t value;
+};
+
+struct RLE_Span {
+  uint8_t _span_size_pad[sizeof(size_t)];
+  uint8_t value;
+};
+BLI_STATIC_ASSERT(sizeof(RLE_Span) == sizeof(size_t) + sizeof(uint8_t), "");
+
+struct RLE_Elem {
+  union {
+    RLE_Head head;
+    RLE_Span span;
+    RLE_Literal literal;
+  };
+};
+
+struct RLE_ElemChunk {
+  RLE_ElemChunk *next;
+  size_t links_num;
+  /** Use 4KB chunks for efficient small allocations. */
+  RLE_Elem links[(4096 / sizeof(RLE_Elem)) -
+                 (sizeof(RLE_ElemChunk *) + sizeof(size_t) + MEM_SIZE_OVERHEAD)];
+};
+BLI_STATIC_ASSERT(sizeof(RLE_ElemChunk) <= 4096 - MEM_SIZE_OVERHEAD, "");
+
+struct RLE_ElemChunkIter {
+  RLE_ElemChunk *iter;
+  size_t link_curr;
+};
+
+static void rle_link_chunk_iter_new(RLE_ElemChunk *links_block, RLE_ElemChunkIter *link_block_iter)
+{
+  link_block_iter->iter = links_block;
+  link_block_iter->link_curr = 0;
+}
+
+static RLE_Elem *rle_link_chunk_iter_step(RLE_ElemChunkIter *link_block_iter)
+{
+  RLE_ElemChunk *link_block = link_block_iter->iter;
+  if (link_block_iter->link_curr < link_block->links_num) {
+    return &link_block->links[link_block_iter->link_curr++];
+  }
+  if (link_block->next) {
+    link_block = link_block_iter->iter = link_block->next;
+    link_block_iter->link_curr = 1;
+    return &link_block->links[0];
+  }
+  return nullptr;
+}
+
+static RLE_ElemChunk *rle_link_chunk_new()
+{
+  RLE_ElemChunk *link_block = MEM_mallocN<RLE_ElemChunk>(__func__);
+  link_block->next = nullptr;
+  link_block->links_num = 0;
+  return link_block;
+}
+
+static void rle_link_chunk_free_all(RLE_ElemChunk *link_block)
+{
+  while (RLE_ElemChunk *link_iter = link_block) {
+    link_block = link_iter->next;
+    MEM_freeN(link_iter);
+  }
+}
+
+static RLE_Elem *rle_link_chunk_elem_new(RLE_ElemChunk **link_block_p)
+{
+  RLE_ElemChunk *link_block = *link_block_p;
+  if (UNLIKELY(link_block->links_num == ARRAY_SIZE(link_block->links))) {
+    RLE_ElemChunk *link_block_next = rle_link_chunk_new();
+    link_block->next = link_block_next;
+    link_block = link_block_next;
+    *link_block_p = link_block_next;
+  }
+  return &link_block->links[link_block->links_num++];
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Public API
+ * \{ */
+
+uint8_t *BLI_array_store_rle_encode(const uint8_t *data_dec,
+                                    const size_t data_dec_len,
+                                    const size_t data_enc_extra_size,
+                                    size_t *r_data_enc_len)
+{
+  size_t data_enc_alloc_size = data_enc_extra_size +
+                               sizeof(RLE_Literal); /* A single null terminator. */
+
+  /* Notes on the threshold for choosing when to include literal data or RLE encode.
+   * From testing a ~4 million array of booleans.
+   *
+   * Regarding space efficiency:
+   *
+   * - For data with fewer changes: `sizeof(RLE_Literal)` (16 on a 64bit system) is optimal.
+   *   The improvement varies, between 5-20%.
+   * - For random data: `sizeof(RLE_Literal) + sizeof(size_t)` (24 on a 64bit system) is optimal.
+   *   The improvement is only ~5% though.
+   *
+   * The time difference between each is roughly the same.
+   */
+  constexpr size_t rle_skip_threshold = sizeof(RLE_Literal);
+
+  RLE_ElemChunk *link_blocks = rle_link_chunk_new();
+  RLE_ElemChunk *link_blocks_first = link_blocks;
+
+  /* Re-use results from scanning ahead (as needed). */
+  for (size_t ofs_dec = 0, span_skip_next = 1; ofs_dec < data_dec_len;) {
+    /* Scan ahead to detect the size of the non-RLE span. */
+    size_t ofs_dec_next = ofs_dec + span_skip_next;
+    span_skip_next = 1;
+
+    /* Detect and use the `span` if possible. */
+    uint8_t value_start = data_dec[ofs_dec];
+    ofs_dec_next = find_byte_not_equal_to(data_dec, ofs_dec_next, data_dec_len, value_start);
+
+    RLE_Elem *e = rle_link_chunk_elem_new(&link_blocks);
+    const size_t span = ofs_dec_next - ofs_dec;
+    if (span >= rle_skip_threshold) {
+      /* Catch off by one errors. */
+      BLI_assert(data_dec[ofs_dec] == data_dec[(ofs_dec + span) - 1]);
+      BLI_assert((ofs_dec + span == data_dec_len) ||
+                 (data_dec[ofs_dec] != data_dec[(ofs_dec + span)]));
+      e->head.span_size = span;
+      e->span.value = value_start;
+      data_enc_alloc_size += sizeof(RLE_Span);
+    }
+    else {
+      /* A large enough span was not found,
+       * scan ahead to detect the size of the non-RLE span. */
+
+      /* Check the offset isn't at the very end of the array. */
+      size_t ofs_dec_test = ofs_dec_next + 1;
+      if (LIKELY(ofs_dec_test < data_dec_len)) {
+        /* The first value that changed, start searching here. */
+        size_t ofs_dec_test_start = ofs_dec_next;
+        value_start = data_dec[ofs_dec_test_start];
+        while (true) {
+          if (value_start == data_dec[ofs_dec_test]) {
+            ofs_dec_test += 1;
+            const size_t span_test = ofs_dec_test - ofs_dec_test_start;
+            BLI_assert(span_test <= rle_skip_threshold);
+            if (span_test == rle_skip_threshold) {
+              /* Write the span of non-RLE data,
+               * then start scanning the magnitude of the RLE span at the start of the loop. */
+              span_skip_next = span_test;
+              ofs_dec_next = ofs_dec_test_start;
+              break;
+            }
+          }
+          else {
+            BLI_assert(ofs_dec_test - ofs_dec_test_start < rle_skip_threshold);
+            value_start = data_dec[ofs_dec_test];
+            ofs_dec_test_start = ofs_dec_test;
+            ofs_dec_test += 1;
+          }
+
+          if (UNLIKELY(ofs_dec_test == data_dec_len)) {
+            ofs_dec_next = data_dec_len;
+            break;
+          }
+        }
+      }
+      else {
+        ofs_dec_next = data_dec_len;
+      }
+
+      /* Interleave the #RLE_Literal. */
+      const size_t non_rle_span = ofs_dec_next - ofs_dec;
+      e->head.span_size = 0;
+      e->literal.value = non_rle_span;
+      data_enc_alloc_size += sizeof(RLE_Literal) + non_rle_span;
+    }
+
+    ofs_dec = ofs_dec_next;
+  }
+
+  /* Encode RLE and literal data into this flat buffer. */
+  uint8_t *data_enc = MEM_malloc_arrayN<uint8_t>(data_enc_alloc_size, __func__);
+  data_enc += data_enc_extra_size;
+
+  size_t ofs_enc = 0;
+  size_t ofs_dec = 0;
+
+  RLE_ElemChunkIter link_block_iter;
+  rle_link_chunk_iter_new(link_blocks_first, &link_block_iter);
+  while (RLE_Elem *e = rle_link_chunk_iter_step(&link_block_iter)) {
+    BLI_assert(ofs_dec <= data_dec_len);
+
+    if (e->head.span_size) {
+      memcpy(data_enc + ofs_enc, &e->span, sizeof(RLE_Span));
+      ofs_enc += sizeof(RLE_Span);
+      ofs_dec += e->head.span_size;
+    }
+    else {
+      memcpy(data_enc + ofs_enc, &e->literal, sizeof(RLE_Literal));
+      ofs_enc += sizeof(RLE_Literal);
+      BLI_assert(e->literal.value > 0);
+      const size_t non_rle_span = e->literal.value;
+      memcpy(data_enc + ofs_enc, data_dec + ofs_dec, non_rle_span);
+      ofs_enc += non_rle_span;
+      ofs_dec += non_rle_span;
+    }
+  }
+  rle_link_chunk_free_all(link_blocks_first);
+  BLI_assert(data_enc_extra_size + ofs_enc + sizeof(RLE_Literal) == data_enc_alloc_size);
+  BLI_assert(ofs_dec == data_dec_len);
+
+  /* Set the `RLE_Literal` span & value to 0 to terminate. */
+  memset(data_enc + ofs_enc, 0x0, sizeof(RLE_Literal));
+
+  *r_data_enc_len = data_enc_alloc_size - data_enc_extra_size;
+
+  data_enc -= data_enc_extra_size;
+  return data_enc;
+}
+
+void BLI_array_store_rle_decode(const uint8_t *data_enc,
+                                const size_t data_enc_len,
+                                void *data_dec_v,
+                                const size_t data_dec_len)
+{
+  /* NOTE: `data_enc_len` & `data_dec_len` could be omitted.
+   * They're just to ensure data isn't corrupt. */
+  uint8_t *data_dec = reinterpret_cast<uint8_t *>(data_dec_v);
+  size_t ofs_enc = 0;
+  size_t ofs_dec = 0;
+
+  while (true) {
+    /* Copy as this may not be aligned. */
+    RLE_Head e;
+    memcpy(&e, data_enc + ofs_enc, sizeof(RLE_Head));
+    ofs_enc += sizeof(RLE_Head);
+    if (e.span_size != 0) {
+      /* Read #RLE_Span::value directly from memory. */
+      const uint8_t value = *reinterpret_cast<const uint8_t *>(data_enc + ofs_enc);
+      memset(data_dec + ofs_dec, int(value), e.span_size);
+      ofs_enc += sizeof(uint8_t);
+      ofs_dec += e.span_size;
+    }
+    else {
+      /* Read #RLE_Literal::value directly from memory. */
+      size_t non_rle_span;
+      memcpy(&non_rle_span, data_enc + ofs_enc, sizeof(size_t));
+      ofs_enc += sizeof(size_t);
+      if (non_rle_span) {
+        memcpy(data_dec + ofs_dec, data_enc + ofs_enc, non_rle_span);
+        ofs_enc += non_rle_span;
+        ofs_dec += non_rle_span;
+      }
+      else {
+        /* Both are zero - an end-of-buffer signal. */
+        break;
+      }
+    }
+  }
+  BLI_assert(ofs_enc == data_enc_len);
+  BLI_assert(ofs_dec == data_dec_len);
+  UNUSED_VARS_NDEBUG(data_enc_len, data_dec_len);
+}
+
+/** \} */
--- a/source/blender/blenlib/tests/BLI_array_store_test.cc
+++ b/source/blender/blenlib/tests/BLI_array_store_test.cc
@@ -18,6 +18,13 @@
 /* print memory savings */
 // #define DEBUG_PRINT

+/* Print time. */
+// #define DEBUG_TIME
+#ifdef DEBUG_TIME
+#  include "BLI_time.h"
+#  include "BLI_time_utildefines.h"
+#endif
+
 /* -------------------------------------------------------------------- */
 /* Helper functions */

@@ -789,10 +796,238 @@ TEST(array_store, TestChunk_Rand31_Stride11_Chunk21)
  random_chunk_mutate_helper(31, 100, 11, 21, 7117);
 }

-#if 0
 /* -------------------------------------------------------------------- */
+/** \name RLE Encode/Decode Utilities
+ * \{ */

-/* Test From Files (disabled, keep for local tests.) */
+static bool rle_encode_decode_test(const uint8_t *data_dec,
+                                   size_t data_dec_len,
+                                   size_t *r_data_enc_len)
+{
+  size_t data_enc_len;
+  uint8_t *data_enc;
+
+#ifdef DEBUG_TIME
+  TIMEIT_START(encode);
+#endif
+  data_enc = BLI_array_store_rle_encode(data_dec, data_dec_len, 0, &data_enc_len);
+#ifdef DEBUG_TIME
+  TIMEIT_END(encode);
+#endif
+
+  uint8_t *data_dec_copy = MEM_malloc_arrayN<uint8_t>(data_dec_len, __func__);
+
+#ifdef DEBUG_TIME
+  TIMEIT_START(decode);
+#endif
+  BLI_array_store_rle_decode(data_enc, data_enc_len, data_dec_copy, data_dec_len);
+#ifdef DEBUG_TIME
+  TIMEIT_END(decode);
+#endif
+
+  MEM_freeN(data_enc);
+  const bool eq = memcmp(data_dec, data_dec_copy, data_dec_len) == 0;
+  MEM_freeN(data_dec_copy);
+  if (r_data_enc_len) {
+    *r_data_enc_len = data_enc_len;
+  }
+  return eq;
+}
+
+/**
+ * Test that a span of empty data gets RLE encoded.
+ */
+static void array_store_test_random_span_rle_encode(const size_t data_size,
+                                                    const size_t span_size,
+                                                    const int permitations)
+{
+  BLI_assert(data_size > span_size);
+
+  RNG *rng = BLI_rng_new(1);
+  uint8_t *data = MEM_malloc_arrayN<uint8_t>(data_size, __func__);
+  uint8_t *data_pattern = MEM_malloc_arrayN<uint8_t>(data_size, __func__);
+
+  for (int i = 0; i < data_size; i++) {
+    data_pattern[i] = i % 2;
+  }
+
+  /* Get the size without any RLE. */
+  const size_t data_enc_no_rle_len = [&data_pattern, &data_size]() -> size_t {
+    size_t data_enc_len;
+    rle_encode_decode_test(data_pattern, data_size, &data_enc_len);
+    return data_enc_len;
+  }();
+
+  for (int mutaiton = 0; mutaiton < permitations; mutaiton++) {
+    memcpy(data, data_pattern, data_size);
+
+    /* The first two mutations are always end-points. */
+    int index;
+    if (mutaiton == 0) {
+      index = 0;
+    }
+    else if (mutaiton == 1) {
+      index = int(data_size) - span_size;
+    }
+    else {
+      /* Place the span in a random location. */
+      index = BLI_rng_get_int(rng) % (data_size - span_size);
+    }
+
+    memset(data + index, 0, span_size);
+
+    size_t data_enc_len;
+    rle_encode_decode_test(data, data_size, &data_enc_len);
+
+    /* Ensure the RLE encoded version has at-least the memory reduction of the span. */
+    const size_t data_enc_len_expected_max = (data_enc_no_rle_len - span_size) +
+                                             (sizeof(size_t[2]) * 2);
+    EXPECT_LE(data_enc_len, data_enc_len_expected_max);
+  }
+  MEM_freeN(data);
+  MEM_freeN(data_pattern);
+
+  BLI_rng_free(rng);
+}
+
+static void array_store_test_random_data_rle_encode(const size_t data_size,
+                                                    const size_t data_ratio_size,
+                                                    const int permitations)
+{
+  RNG *rng = BLI_rng_new(1);
+  uint8_t *data = MEM_malloc_arrayN<uint8_t>(data_size, __func__);
+
+  for (int mutaiton = 0; mutaiton < permitations; mutaiton++) {
+    memset(data, 1, data_ratio_size);
+    memset(data + data_ratio_size, 0, data_size - data_ratio_size);
+
+    BLI_rng_shuffle_array(rng, data, 1, data_size);
+
+    size_t data_enc_len;
+    EXPECT_TRUE(rle_encode_decode_test(data, data_size, &data_enc_len));
+  }
+
+  MEM_freeN(data);
+  BLI_rng_free(rng);
+}
+
+/** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name RLE Encode/Decode Tests
+ * \{ */
+
+TEST(array_store, RLE_Simple)
+{
+  {
+    const uint8_t data[] = {0};
+    EXPECT_TRUE(rle_encode_decode_test(data, 0, nullptr));
+  }
+  {
+    const uint8_t data[] = {0};
+    EXPECT_TRUE(rle_encode_decode_test(data, sizeof(data), nullptr));
+  }
+  {
+    const uint8_t data[] = {1};
+    EXPECT_TRUE(rle_encode_decode_test(data, sizeof(data), nullptr));
+  }
+}
+
+TEST(array_store, RLE_Uniform)
+{
+  const uint8_t data_uniform[64] = {0};
+  uint8_t data_pattern[64] = {0};
+  for (int i = 0; i < sizeof(data_pattern); i += 2) {
+    data_pattern[i] = 1;
+  }
+
+  size_t data_uniform_enc_len = 0;
+  size_t data_pattern_enc_len = 0;
+
+  EXPECT_TRUE(rle_encode_decode_test(data_uniform, sizeof(data_uniform), &data_uniform_enc_len));
+  EXPECT_TRUE(rle_encode_decode_test(data_pattern, sizeof(data_pattern), &data_pattern_enc_len));
+
+  /* This depends on implementation details of header sizes.
+   * Since there is no intention to change these, allow this.
+   * They can always be updated as needed. */
+  EXPECT_EQ(data_uniform_enc_len, sizeof(size_t) + sizeof(uint8_t) + sizeof(size_t[2]));
+  EXPECT_EQ(data_pattern_enc_len, sizeof(data_uniform) + sizeof(size_t[4]));
+}
+
+TEST(array_store, RLE_RandomSpan)
+{
+  /* Enable if there is suspicion of rare edge cases causing problems. */
+  const bool do_stress_test = false;
+
+  const int permutations = do_stress_test ? 256 : 8;
+
+  array_store_test_random_span_rle_encode(63, 31, permutations);
+  array_store_test_random_span_rle_encode(63, 32, permutations);
+  array_store_test_random_span_rle_encode(63, 33, permutations);
+
+  array_store_test_random_span_rle_encode(64, 31, permutations);
+  array_store_test_random_span_rle_encode(64, 32, permutations);
+  array_store_test_random_span_rle_encode(64, 33, permutations);
+
+  array_store_test_random_span_rle_encode(65, 31, permutations);
+  array_store_test_random_span_rle_encode(65, 32, permutations);
+  array_store_test_random_span_rle_encode(65, 33, permutations);
+
+  if (do_stress_test) {
+    const int data_size_max = 256;
+    const int margin = sizeof(size_t[2]);
+    for (int data_size = margin; data_size < data_size_max; data_size++) {
+      for (int span_size = 1; span_size < data_size - margin; span_size++) {
+        array_store_test_random_span_rle_encode(data_size, span_size, permutations);
+      }
+    }
+  }
+}
+
+TEST(array_store, RLE_RandomBytes)
+{
+  /* Enable if there is suspicion of rare edge cases causing problems. */
+  const bool do_stress_test = false;
+
+  const int permutations = do_stress_test ? 256 : 8;
+
+  array_store_test_random_data_rle_encode(128, 16, permutations);
+  array_store_test_random_data_rle_encode(128, 32, permutations);
+  array_store_test_random_data_rle_encode(128, 64, permutations);
+  array_store_test_random_data_rle_encode(128, 128, permutations);
+
+  array_store_test_random_data_rle_encode(131, 16, permutations);
+  array_store_test_random_data_rle_encode(131, 32, permutations);
+  array_store_test_random_data_rle_encode(131, 64, permutations);
+  array_store_test_random_data_rle_encode(131, 128, permutations);
+
+  if (do_stress_test) {
+    const int data_size_max = 256;
+    const int margin = sizeof(size_t[2]);
+    for (int data_size = margin; data_size < data_size_max; data_size++) {
+      for (int data_ratio_size = 1; data_ratio_size < data_size - 1; data_ratio_size++) {
+        array_store_test_random_span_rle_encode(data_size, data_ratio_size, permutations);
+      }
+    }
+  }
+
+  if (do_stress_test) {
+    /* Stress random data, handy for timing (20 million). */
+    const size_t data_len_large = 32000000;
+    array_store_test_random_data_rle_encode(data_len_large, data_len_large / 2, 4);
+    array_store_test_random_data_rle_encode(data_len_large, 0, 4);
+  }
+}
+
+/** \} */
+
+#if 0
+
+/* -------------------------------------------------------------------- */
+/** \name Text File Tests (Disabled)
+ *
+ * Test From Files (disabled, keep for local tests).
+ * \{ */

 static void *file_read_binary_as_mem(const char *filepath, size_t pad_bytes, size_t *r_size)
 {
@@ -870,3 +1105,5 @@ TEST(array_store, PlainTextFiles)
  BLI_array_store_destroy(bs);
 }
 #endif
+
+/** \} */