Add BLI_array_store copy-on-write API
authorCampbell Barton <ideasman42@gmail.com>
Mon, 30 May 2016 05:25:36 +0000 (15:25 +1000)
committerCampbell Barton <ideasman42@gmail.com>
Mon, 30 May 2016 06:18:24 +0000 (16:18 +1000)
This supported in-memory de-duplication,
useful to avoid in-efficient memory use when storing multiple, similar arrays.

source/blender/blenlib/BLI_array_store.h [new file with mode: 0644]
source/blender/blenlib/CMakeLists.txt
source/blender/blenlib/intern/array_store.c [new file with mode: 0644]

diff --git a/source/blender/blenlib/BLI_array_store.h b/source/blender/blenlib/BLI_array_store.h
new file mode 100644 (file)
index 0000000..f4cbc07
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+#ifndef __BLI_ARRAY_STORE_H__
+#define __BLI_ARRAY_STORE_H__
+
+/** \file BLI_array_store.h
+ *  \ingroup bli
+ *  \brief Efficient in-memory storage of multiple similar arrays.
+ */
+
+typedef struct BArrayStore BArrayStore;
+typedef struct BArrayState BArrayState;
+
+BArrayStore *BLI_array_store_create(
+        unsigned int stride, unsigned int chunk_count);
+void BLI_array_store_destroy(
+        BArrayStore *bs);
+void BLI_array_store_clear(
+        BArrayStore *bs);
+
+/* find the memory used by all states (expanded & real) */
+size_t BLI_array_store_calc_size_expanded_get(
+        const BArrayStore *bs);
+size_t BLI_array_store_calc_size_compacted_get(
+        const BArrayStore *bs);
+
+BArrayState *BLI_array_store_state_add(
+        BArrayStore *bs,
+        const void *data, const size_t data_len,
+        const BArrayState *state_reference);
+void BLI_array_store_state_remove(
+        BArrayStore *bs,
+        BArrayState *state);
+
+size_t BLI_array_store_state_size_get(
+        BArrayState *state);
+void BLI_array_store_state_data_get(
+        BArrayState *state,
+        void *data);
+void *BLI_array_store_state_data_get_alloc(
+        BArrayState *state,
+        size_t *r_data_len);
+
+/* only for tests */
+bool BLI_array_store_is_valid(
+        BArrayStore *bs);
+
+#endif  /* __BLI_ARRAY_STORE_H__ */
index 944ba60..42d9587 100644 (file)
@@ -52,6 +52,7 @@ set(SRC
        intern/BLI_memarena.c
        intern/BLI_mempool.c
        intern/DLRB_tree.c
+       intern/array_store.c
        intern/array_utils.c
        intern/astar.c
        intern/boxpack2d.c
@@ -120,6 +121,7 @@ set(SRC
        BLI_alloca.h
        BLI_args.h
        BLI_array.h
+       BLI_array_store.h
        BLI_array_utils.h
        BLI_astar.h
        BLI_bitmap.h
diff --git a/source/blender/blenlib/intern/array_store.c b/source/blender/blenlib/intern/array_store.c
new file mode 100644 (file)
index 0000000..8cbabdd
--- /dev/null
@@ -0,0 +1,1731 @@
+/*
+ * ***** BEGIN GPL LICENSE BLOCK *****
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * ***** END GPL LICENSE BLOCK *****
+ */
+
+/** \file blender/blenlib/intern/array_store.c
+ *  \ingroup bli
+ *  \brief Array storage to minimize duplication.
+ *
+ * This is done by splitting arrays into chunks and using copy-on-write (COW),
+ * to de-duplicate chunks,
+ * from the users perspective this is an implementation detail.
+ *
+ *
+ * Overview
+ * ========
+ *
+ *
+ * Data Structure
+ * --------------
+ *
+ * This diagram is an overview of the structure of a single array-store.
+ *
+ * \note The only 2 structues here which are referenced externally are the.
+ *
+ * - BArrayStore: The whole array store.
+ * - BArrayState: Represents a single state (array) of data.
+ *   These can be add using a reference state, while this could be considered the previous or parent state.
+ *   no relationship is kept, so the caller is free to add any state from the same BArrayStore as a reference.
+ *
+ * <pre>
+ * <+> BArrayStore: root data-structure,
+ *  |  can store many 'states', which share memory.
+ *  |
+ *  |  This can store many arrays, however they must share the same 'stride'.
+ *  |  Arrays of different types will need to use a new BArrayStore.
+ *  |
+ *  +- <+> states (Collection of BArrayState's):
+ *  |   |  Each represents an array added by the user of this API.
+ *  |   |  and references a chunk_list (each state is a chunk_list user).
+ *  |   |  Note that the list order has no significance.
+ *  |   |
+ *  |   +- <+> chunk_list (BChunkList):
+ *  |       |  The chunks that make up this state.
+ *  |       |  Each state is a chunk_list user,
+ *  |       |  avoids duplicating lists when there is no change between states.
+ *  |       |
+ *  |       +- chunk_refs (List of BChunkRef): Each chunk_ref links to a a BChunk.
+ *  |          Each reference is a chunk user,
+ *  |          avoids duplicating smaller chunks of memory found in multiple states.
+ *  |
+ *  +- info (BArrayInfo):
+ *  |  Sizes and offsets for this array-store.
+ *  |  Also caches some variables for reuse.
+ *  |
+ *  +- <+> memory (BArrayMemory):
+ *      |  Memory pools for storing BArrayStore data.
+ *      |
+ *      +- chunk_list (Pool of BChunkList):
+ *      |  All chunk_lists, (reference counted, used by BArrayState).
+ *      |
+ *      +- chunk_ref (Pool of BChunkRef):
+ *      |  All chunk_refs (link between BChunkList & BChunk).
+ *      |
+ *      +- chunks (Pool of BChunk):
+ *         All chunks, (reference counted, used by BChunkList).
+ *         These have their headers hashed for reuse so we can quickly check for duplicates.
+ * </pre>
+ *
+ *
+ * De-Duplication
+ * --------------
+ *
+ * When creating a new state, a previous state can be given as a reference,
+ * matching chunks from this state are re-used in the new state.
+ *
+ * First matches at either end of the array are detected.
+ * For identical arrays this is all thats needed.
+ *
+ * De-duplication is performed on any remaining chunks, by hasing the first few bytes of the chunk
+ * (see: BCHUNK_HASH_TABLE_ACCUMULATE_STEPS).
+ *
+ * \note This is cached for reuse since the referenced data never changes.
+ *
+ * An array is created to store hash values at every 'stride',
+ * then stepped over to search for matching chunks.
+ *
+ * Once a match is found, there is a high chance next chunks match too,
+ * so this is checked to avoid performing so many hash-lookups.
+ * Otherwise new chunks are created.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_listbase.h"
+#include "BLI_mempool.h"
+
+#include "BLI_strict_flags.h"
+
+#include "BLI_array_store.h"  /* own include */
+
+/* only for BLI_array_store_is_valid */
+#include "BLI_ghash.h"
+
+/** \name Defines
+ *
+ * Some of the logic for merging is quite involved,
+ * support disabling some parts of this.
+ * \{ */
+
+/* Scan first chunks (happy path when beginning of the array matches).
+ * When the array is a perfect match, we can re-use the entire list.
+ *
+ * Note that disabling makes some tests fail that check for output-size.
+ */
+#define USE_FASTPATH_CHUNKS_FIRST
+
+/* Scan last chunks (happy path when end of the array matches).
+ * When the end of the array matches, we can quickly add these chunks.
+ * note that we will add contiguous matching chunks
+ * so this isn't as useful as USE_FASTPATH_CHUNKS_FIRST,
+ * however it avoids adding matching chunks into the lookup table,
+ * so creating the lookup table won't be as expensive.
+ */
+#ifdef USE_FASTPATH_CHUNKS_FIRST
+#  define USE_FASTPATH_CHUNKS_LAST
+#endif
+
+/* For arrays of matching length, test that *enough* of the chunks are aligned,
+ * and simply step over both arrays, using matching chunks.
+ * This avoids overhead of using a lookup table for cases when we can assume they're mostly aligned.
+ */
+#define USE_ALIGN_CHUNKS_TEST
+
+/* Accumulate hashes from right to left so we can create a hash for the chunk-start.
+ * This serves to increase uniqueness and will help when there is many values which are the same.
+ */
+#define USE_HASH_TABLE_ACCUMULATE
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+/* Number of times to propagate hashes back.
+ * Effectively a 'triangle-number'.
+ * so 4 -> 7, 5 -> 10, 6 -> 15... etc.
+ */
+#  define BCHUNK_HASH_TABLE_ACCUMULATE_STEPS 4
+#else
+/* How many items to hash (multiplied by stride)
+ */
+#  define BCHUNK_HASH_LEN 4
+#endif
+
+/* Calculate the key once and reuse it
+ */
+#define USE_HASH_TABLE_KEY_CACHE
+#ifdef USE_HASH_TABLE_KEY_CACHE
+#  define HASH_TABLE_KEY_UNSET    ((uint64_t)-1)
+#  define HASH_TABLE_KEY_FALLBACK ((uint64_t)-2)
+#endif
+
+/* How much smaller the table is then the total number of steps.
+ */
+#define BCHUNK_HASH_TABLE_DIV 16
+
+/* Merge too small/large chunks:
+ *
+ * Using this means chunks below a threshold will be merged together.
+ * Even though short term this uses more memory,
+ * long term the overhead of maintaining many small chunks is reduced.
+ * This is defined by setting the minimum chunk size (as a fraction of the regular chunk size).
+ *
+ * Chunks may also become too large (when incrementally growing an array),
+ * this also enables chunk splitting.
+ */
+#define USE_MERGE_CHUNKS
+
+#ifdef USE_MERGE_CHUNKS
+/* Merge chunks smaller then: (chunk_size / BCHUNK_MIN_SIZE_DIV)
+ */
+#  define BCHUNK_SIZE_MIN_DIV 8
+
+/* Disallow chunks bigger then the regular chunk size scaled by this value
+ * note: must be at least 2!
+ * however, this code runs wont run in tests unless its ~1.1 ugh.
+ * so lower only to check splitting works.
+ */
+#  define BCHUNK_SIZE_MAX_MUL 2
+#endif  /* USE_MERGE_CHUNKS */
+
+/* slow (keep disabled), but handy for debugging */
+// #define USE_VALIDATE_LIST_SIZE
+
+// #define USE_VALIDATE_LIST_DATA_PARTIAL
+
+// #define USE_PARANOID_CHECKS
+
+/** \} */
+
+
+/** \name Internal Structs
+ * \{ */
+
+typedef unsigned int  uint;
+typedef unsigned char ubyte;
+
+typedef uint64_t hash_key;
+
+
+typedef struct BArrayInfo {
+       size_t chunk_stride;
+       uint chunk_count;
+
+       /* pre-calculated */
+       size_t chunk_byte_size;
+       size_t chunk_byte_size_min;
+
+       size_t accum_read_ahead_bytes;
+#ifdef USE_HASH_TABLE_ACCUMULATE
+       size_t accum_steps;
+       size_t accum_read_ahead_len;
+#endif
+} BArrayInfo;
+
+typedef struct BArrayMemory {
+       BLI_mempool *chunk_list;  /* BChunkList */
+       BLI_mempool *chunk_ref;   /* BChunkRef */
+       BLI_mempool *chunk;       /* BChunk */
+} BArrayMemory;
+
+/**
+ * Main storage for all states
+ */
+typedef struct BArrayStore {
+       /* static */
+       BArrayInfo info;
+
+       /* memory storage */
+       BArrayMemory memory;
+
+       /**
+        * #BArrayState may be in any order (logic should never depend on state order).
+        */
+       ListBase states;
+} BArrayStore;
+
+/**
+ * A single instance of an array.
+ *
+ * This is how external API's hold a reference to an in-memory state,
+ * although the struct is private.
+ *
+ * \note Currently each 'state' is allocated separately.
+ * While this could be moved to a memory pool,
+ * it makes it easier to trace invalid usage, so leave as-is for now.
+ */
+typedef struct BArrayState {
+       /** linked list in #BArrayStore.states */
+       struct BArrayState *next, *prev;
+
+       struct BChunkList *chunk_list;  /* BChunkList's */
+
+} BArrayState;
+
+typedef struct BChunkList {
+       ListBase chunk_refs;      /* BChunkRef's */
+       uint     chunk_refs_len;  /* BLI_listbase_count(chunks), store for reuse. */
+       size_t   total_size;      /* size of all chunks */
+
+       /** number of #BArrayState using this. */
+       int      users;
+} BChunkList;
+
+/* a chunk of an array */
+typedef struct BChunk {
+       const ubyte *data;
+       size_t       data_len;
+       /** number of #BChunkList using this. */
+       int          users;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+       hash_key key;
+#endif
+} BChunk;
+
+/**
+ * Links to store #BChunk data in #BChunkList.chunks.
+ */
+typedef struct BChunkRef {
+       struct BChunkRef *next, *prev;
+       BChunk *link;
+} BChunkRef;
+
+/**
+ * Single linked list used when putting chunks into a temporary table,
+ * used for lookups.
+ *
+ * Point to the #BChunkRef, not the #BChunk,
+ * to allow talking down the chunks in-order until a mis-match is found,
+ * this avoids having to do so many table lookups.
+ */
+typedef struct BTableRef {
+       struct BTableRef *next;
+       const BChunkRef *cref;
+} BTableRef;
+
+/** \} */
+
+
+static size_t bchunk_list_size(const BChunkList *chunk_list);
+
+
+/** \name Internal BChunk API
+ * \{ */
+
+static BChunk *bchunk_new(
+        BArrayMemory *bs_mem, const ubyte *data, const size_t data_len)
+{
+       BChunk *chunk = BLI_mempool_alloc(bs_mem->chunk);
+       chunk->data     = data;
+       chunk->data_len = data_len;
+       chunk->users = 0;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+       chunk->key = HASH_TABLE_KEY_UNSET;
+#endif
+       return chunk;
+}
+
+static BChunk *bchunk_new_copydata(
+        BArrayMemory *bs_mem, const ubyte *data, const size_t data_len)
+{
+       ubyte *data_copy = MEM_mallocN(data_len, __func__);
+       memcpy(data_copy, data, data_len);
+       return bchunk_new(bs_mem, data_copy, data_len);
+}
+
+static void bchunk_decref(
+        BArrayMemory *bs_mem, BChunk *chunk)
+{
+       BLI_assert(chunk->users > 0);
+       if (chunk->users == 1) {
+               MEM_freeN((void *)chunk->data);
+               BLI_mempool_free(bs_mem->chunk, chunk);
+       }
+       else {
+               chunk->users -= 1;
+       }
+}
+
+static bool bchunk_data_compare(
+        const BChunk *chunk,
+        const ubyte *data_base, const size_t data_base_len,
+        const size_t offset)
+{
+       if (offset + (size_t)chunk->data_len <= data_base_len) {
+               return (memcmp(&data_base[offset], chunk->data, chunk->data_len) == 0);
+       }
+       else {
+               return false;
+       }
+}
+
+/** \} */
+
+
+/** \name Internal BChunkList API
+ * \{ */
+
+static BChunkList *bchunk_list_new(
+        BArrayMemory *bs_mem, size_t total_size)
+{
+       BChunkList *chunk_list = BLI_mempool_alloc(bs_mem->chunk_list);
+
+       BLI_listbase_clear(&chunk_list->chunk_refs);
+       chunk_list->chunk_refs_len = 0;
+       chunk_list->total_size = total_size;
+       chunk_list->users = 0;
+       return chunk_list;
+}
+
+static void bchunk_list_decref(
+        BArrayMemory *bs_mem, BChunkList *chunk_list)
+{
+       BLI_assert(chunk_list->users > 0);
+       if (chunk_list->users == 1) {
+               for (BChunkRef *cref = chunk_list->chunk_refs.first, *cref_next; cref; cref = cref_next) {
+                       cref_next = cref->next;
+                       bchunk_decref(bs_mem, cref->link);
+                       BLI_mempool_free(bs_mem->chunk_ref, cref);
+               }
+
+               BLI_mempool_free(bs_mem->chunk_list, chunk_list);
+       }
+       else {
+               chunk_list->users -= 1;
+       }
+}
+
+#ifdef USE_VALIDATE_LIST_SIZE
+#  ifndef NDEBUG
+#    define ASSERT_CHUNKLIST_SIZE(chunk_list, n) BLI_assert(bchunk_list_size(chunk_list) == n)
+#  endif
+#endif
+#ifndef ASSERT_CHUNKLIST_SIZE
+#  define ASSERT_CHUNKLIST_SIZE(chunk_list, n) (EXPR_NOP(chunk_list), EXPR_NOP(n))
+#endif
+
+
+#ifdef USE_VALIDATE_LIST_DATA_PARTIAL
+static size_t bchunk_list_data_check(
+        const BChunkList *chunk_list, const ubyte *data)
+{
+       size_t total_size = 0;
+       for (BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+               if (memcmp(&data[total_size], cref->link->data, cref->link->data_len) != 0) {
+                       return false;
+               }
+               total_size += cref->link->data_len;
+       }
+       return true;
+}
+#  define ASSERT_CHUNKLIST_DATA(chunk_list, data) BLI_assert(bchunk_list_data_check(chunk_list, data))
+#else
+#  define ASSERT_CHUNKLIST_DATA(chunk_list, data) (EXPR_NOP(chunk_list), EXPR_NOP(data))
+#endif
+
+
+#ifdef USE_MERGE_CHUNKS
+static void bchunk_list_ensure_min_size_last(
+        const BArrayInfo *info, BArrayMemory *bs_mem,
+        BChunkList *chunk_list)
+{
+       BChunkRef *cref = chunk_list->chunk_refs.last;
+       if (cref && cref->prev) {
+               /* both are decref'd after use (end of this block) */
+               BChunk *chunk_curr = cref->link;
+               BChunk *chunk_prev = cref->prev->link;
+
+               if (MIN2(chunk_prev->data_len, chunk_curr->data_len) < info->chunk_byte_size_min) {
+                       const size_t data_merge_len = chunk_prev->data_len + chunk_curr->data_len;
+                       /* we could pass, but no need */
+                       if (data_merge_len <= (info->chunk_byte_size * BCHUNK_SIZE_MAX_MUL)) {
+                               /* we have enough space to merge */
+
+                               /* remove last from linklist */
+                               BLI_assert(chunk_list->chunk_refs.last != chunk_list->chunk_refs.first);
+                               cref->prev->next = NULL;
+                               chunk_list->chunk_refs.last = cref->prev;
+                               chunk_list->chunk_refs_len -= 1;
+
+                               ubyte *data_merge   = MEM_mallocN(data_merge_len, __func__);
+                               memcpy(data_merge,                        chunk_prev->data, chunk_prev->data_len);
+                               memcpy(&data_merge[chunk_prev->data_len], chunk_curr->data, chunk_curr->data_len);
+
+                               cref->prev->link = bchunk_new(bs_mem, data_merge, data_merge_len);
+                               cref->prev->link->users += 1;
+
+                               BLI_mempool_free(bs_mem->chunk_ref, cref);
+                       }
+                       else {
+                               /* If we always merge small slices, we should _almost_ never end up having very large chunks.
+                                * Gradual expanding on contracting will cause this.
+                                *
+                                * if we do, the code below works (test by setting 'BCHUNK_SIZE_MAX_MUL = 1.2') */
+
+                               /* keep chunk on the left hand side a regular size */
+                               const size_t split = info->chunk_byte_size;
+
+                               /* merge and split */
+                               const size_t data_prev_len = split;
+                               const size_t data_curr_len = data_merge_len - split;
+                               ubyte *data_prev = MEM_mallocN(data_prev_len, __func__);
+                               ubyte *data_curr = MEM_mallocN(data_curr_len, __func__);
+
+                               if (data_prev_len <= chunk_prev->data_len) {
+                                       const size_t data_curr_shrink_len = chunk_prev->data_len - data_prev_len;
+
+                                       /* setup 'data_prev' */
+                                       memcpy(data_prev, chunk_prev->data, data_prev_len);
+
+                                       /* setup 'data_curr' */
+                                       memcpy(data_curr,                        &chunk_prev->data[data_prev_len], data_curr_shrink_len);
+                                       memcpy(&data_curr[data_curr_shrink_len],  chunk_curr->data, chunk_curr->data_len);
+                               }
+                               else {
+                                       BLI_assert(data_curr_len <= chunk_curr->data_len);
+                                       BLI_assert(data_prev_len >= chunk_prev->data_len);
+
+                                       const size_t data_prev_grow_len = data_prev_len - chunk_prev->data_len;
+
+                                       /* setup 'data_prev' */
+                                       memcpy(data_prev,                        chunk_prev->data, chunk_prev->data_len);
+                                       memcpy(&data_prev[chunk_prev->data_len], chunk_curr->data, data_prev_grow_len);
+
+                                       /* setup 'data_curr' */
+                                       memcpy(data_curr, &chunk_curr->data[data_prev_grow_len], data_curr_len);
+                               }
+
+                               cref->prev->link = bchunk_new(bs_mem, data_prev, data_prev_len);
+                               cref->prev->link->users += 1;
+
+                               cref->link = bchunk_new(bs_mem, data_curr, data_curr_len);
+                               cref->link->users += 1;
+                       }
+
+                       /* free zero users */
+                       bchunk_decref(bs_mem, chunk_curr);
+                       bchunk_decref(bs_mem, chunk_prev);
+               }
+       }
+}
+#endif  /* USE_MERGE_CHUNKS */
+
+/**
+ * Append and don't manage merging small chunks.
+ */
+static bool bchunk_list_append_only(
+        BArrayMemory *bs_mem,
+        BChunkList *chunk_list, BChunk *chunk)
+{
+       BChunkRef *cref = BLI_mempool_alloc(bs_mem->chunk_ref);
+       BLI_addtail(&chunk_list->chunk_refs, cref);
+       cref->link = chunk;
+       chunk_list->chunk_refs_len += 1;
+       chunk->users += 1;
+       return chunk;
+}
+
+static void bchunk_list_append_data(
+        const BArrayInfo *info, BArrayMemory *bs_mem,
+        BChunkList *chunk_list,
+        const ubyte *data, const size_t data_len)
+{
+       BLI_assert(data_len != 0);
+       // printf("data_len: %d\n", data_len);
+#ifdef USE_MERGE_CHUNKS
+       if (!BLI_listbase_is_empty(&chunk_list->chunk_refs)) {
+               BChunkRef *cref = chunk_list->chunk_refs.last;
+               BChunk *chunk_prev = cref->link;
+
+               if (MIN2(chunk_prev->data_len, data_len) < info->chunk_byte_size_min) {
+                       const size_t data_merge_len = chunk_prev->data_len + data_len;
+                       /* realloc for single user */
+                       if (cref->link->users == 1) {
+                               ubyte *data_merge = MEM_reallocN((void *)cref->link->data, data_merge_len);
+                               memcpy(&data_merge[chunk_prev->data_len], data, data_len);
+                               cref->link->data     = data_merge;
+                               cref->link->data_len = data_merge_len;
+                       }
+                       else {
+                               ubyte *data_merge = MEM_mallocN(data_merge_len, __func__);
+                               memcpy(data_merge, chunk_prev->data, chunk_prev->data_len);
+                               memcpy(&data_merge[chunk_prev->data_len], data, data_len);
+                               cref->link = bchunk_new(bs_mem, data_merge, data_merge_len);
+                               cref->link->users += 1;
+                               bchunk_decref(bs_mem, chunk_prev);
+                       }
+                       return;
+               }
+       }
+#else
+       UNUSED_VARS(info);
+#endif  /* USE_MERGE_CHUNKS */
+
+       BChunk *chunk = bchunk_new_copydata(bs_mem, data, data_len);
+       bchunk_list_append_only(bs_mem, chunk_list, chunk);
+
+       /* don't run this, instead preemptively avoid creating a chunk only to merge it (above). */
+#if 0
+#ifdef USE_MERGE_CHUNKS
+       bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list, chunk_size_min);
+#endif
+#endif
+}
+
+static void bchunk_list_append(
+        const BArrayInfo *info, BArrayMemory *bs_mem,
+        BChunkList *chunk_list,
+        BChunk *chunk)
+{
+       bchunk_list_append_only(bs_mem, chunk_list, chunk);
+
+#ifdef USE_MERGE_CHUNKS
+       bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
+#else
+       UNUSED_VARS(info);
+#endif
+}
+
+static void bchunk_list_fill_from_array(
+        const BArrayInfo *info, BArrayMemory *bs_mem,
+        BChunkList *chunk_list,
+        const ubyte *data,
+        const size_t data_len)
+{
+       BLI_assert(BLI_listbase_is_empty(&chunk_list->chunk_refs));
+
+       size_t data_last_chunk_len = 0;
+       size_t data_trim_len = data_len;
+
+#ifdef USE_MERGE_CHUNKS
+       /* avoid creating too-small chunks
+        * more efficient then merging after */
+       if (data_len > info->chunk_byte_size) {
+               data_last_chunk_len = (data_trim_len % info->chunk_byte_size);
+               data_trim_len = data_trim_len - data_last_chunk_len;
+               if (data_last_chunk_len) {
+                       if (data_last_chunk_len < info->chunk_byte_size_min) {
+                               /* may be zero and thats OK */
+                               data_trim_len -= info->chunk_byte_size;
+                               data_last_chunk_len += info->chunk_byte_size;
+                       }
+               }
+       }
+       else {
+               data_trim_len = 0;
+               data_last_chunk_len = data_len;
+       }
+#else
+       data_last_chunk_len = (data_trim_len % info->chunk_byte_size);
+       data_trim_len = data_trim_len - data_last_chunk_len;
+#endif
+
+
+       BLI_assert(data_trim_len + data_last_chunk_len == data_len);
+
+       size_t i_prev = 0;
+       while (i_prev < data_trim_len) {
+               const size_t i = i_prev + info->chunk_byte_size;
+               BChunk *chunk = bchunk_new_copydata(bs_mem, &data[i_prev], i - i_prev);
+               bchunk_list_append_only(bs_mem, chunk_list, chunk);
+               i_prev = i;
+       }
+
+       if (data_last_chunk_len) {
+               BChunk *chunk = bchunk_new_copydata(bs_mem, &data[i_prev], data_last_chunk_len);
+               bchunk_list_append_only(bs_mem, chunk_list, chunk);
+               // i_prev = data_len;
+       }
+
+#ifdef USE_MERGE_CHUNKS
+       if (data_len > info->chunk_byte_size) {
+               BLI_assert(((BChunkRef *)chunk_list->chunk_refs.last)->link->data_len >= info->chunk_byte_size_min);
+       }
+#endif
+
+       /* works but better avoid redundant re-alloc */
+#if 0
+#ifdef USE_MERGE_CHUNKS
+       bchunk_list_ensure_min_size_last(info, bs_mem, chunk_list);
+#endif
+#endif
+
+       ASSERT_CHUNKLIST_SIZE(chunk_list, data_len);
+       ASSERT_CHUNKLIST_DATA(chunk_list, data);
+}
+
+
+/* ---------------------------------------------------------------------------
+ * Internal Table Lookup Functions
+ */
+
+/** \name Internal Hashing/De-Duplication API
+ *
+ * Only used by #bchunk_list_from_data_merge
+ * \{ */
+
+#define HASH_INIT (5381)
+
+BLI_INLINE uint hash_data_single(const ubyte p)
+{
+       return (HASH_INIT << 5) + HASH_INIT + (unsigned int)p;
+}
+
+/* hash bytes, from BLI_ghashutil_strhash_n */
+static uint hash_data(const ubyte *key, size_t n)
+{
+       const signed char *p;
+       unsigned int h = HASH_INIT;
+
+       for (p = (const signed char *)key; n--; p++) {
+               h = (h << 5) + h + (unsigned int)*p;
+       }
+
+       return h;
+}
+
+#undef HASH_INIT
+
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+static void hash_array_from_data(
+        const BArrayInfo *info, const ubyte *data_slice, const size_t data_slice_len,
+        hash_key *hash_array)
+{
+       if (info->chunk_stride != 1) {
+               for (size_t i = 0, i_step = 0; i_step < data_slice_len; i++, i_step += info->chunk_stride) {
+                       hash_array[i] = hash_data(&data_slice[i_step], info->chunk_stride);
+               }
+       }
+       else {
+               /* fast-path for bytes */
+               for (size_t i = 0; i < data_slice_len; i++) {
+                       hash_array[i] = hash_data_single(data_slice[i]);
+               }
+       }
+}
+
+/*
+ * Similar to hash_array_from_data,
+ * but able to step into the next chunk if we run-out of data.
+ */
+static void hash_array_from_cref(
+        const BArrayInfo *info, const BChunkRef *cref, const size_t data_len,
+        hash_key *hash_array)
+{
+       const size_t hash_array_len = data_len / info->chunk_stride;
+       size_t i = 0;
+       do {
+               size_t i_next = hash_array_len - i;
+               size_t data_trim_len = i_next * info->chunk_stride;
+               if (data_trim_len > cref->link->data_len) {
+                       data_trim_len = cref->link->data_len;
+                       i_next = data_trim_len / info->chunk_stride;
+               }
+               BLI_assert(data_trim_len <= cref->link->data_len);
+               hash_array_from_data(info, cref->link->data, data_trim_len, &hash_array[i]);
+               i += i_next;
+               cref = cref->next;
+       } while ((i < hash_array_len) && (cref != NULL));
+
+       /* If this isn't equal, the caller didn't properly check
+        * that there was enough data left in all chunks */
+       BLI_assert(i == hash_array_len);
+}
+
+static void hash_accum(hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
+{
+       /* _very_ unlikely, can happen if you select a chunk-size of 1 for example. */
+       if (UNLIKELY((iter_steps > hash_array_len))) {
+               iter_steps = hash_array_len;
+       }
+
+       const size_t hash_array_search_len = hash_array_len - iter_steps;
+       while (iter_steps != 0) {
+               const size_t hash_offset = iter_steps;
+               for (uint i = 0; i < hash_array_search_len; i++) {
+                       hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
+               }
+               iter_steps -= 1;
+       }
+}
+
+/**
+ * When we only need a single value, can use a small optimization.
+ * we can avoid accumulating the tail of the array a little, each iteration.
+ */
+static void hash_accum_single(hash_key *hash_array, const size_t hash_array_len, size_t iter_steps)
+{
+       BLI_assert(iter_steps <= hash_array_len);
+       if (UNLIKELY(!(iter_steps <= hash_array_len))) {
+               /* while this shouldn't happen, avoid crashing */
+               iter_steps = hash_array_len;
+       }
+       /* We can increase this value each step to avoid accumulating quite as much
+        * while getting the same results as hash_accum */
+       size_t iter_steps_sub = iter_steps;
+
+       while (iter_steps != 0) {
+               const size_t hash_array_search_len = hash_array_len - iter_steps_sub;
+               const size_t hash_offset = iter_steps;
+               for (uint i = 0; i < hash_array_search_len; i++) {
+                       hash_array[i] += (hash_array[i + hash_offset]) * ((hash_array[i] & 0xff) + 1);
+               }
+               iter_steps -= 1;
+               iter_steps_sub += iter_steps;
+       }
+}
+
+static hash_key key_from_chunk_ref(
+        const BArrayInfo *info, const BChunkRef *cref,
+        /* avoid reallicating each time */
+        hash_key *hash_store, const size_t hash_store_len)
+{
+       /* in C, will fill in a reusable array */
+       BChunk *chunk = cref->link;
+       BLI_assert(info->accum_read_ahead_bytes * info->chunk_stride);
+
+       if (info->accum_read_ahead_bytes <= chunk->data_len) {
+               hash_key key;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+               key = chunk->key;
+               if (key != HASH_TABLE_KEY_UNSET) {
+                       /* Using key cache!
+                        * avoids calculating every time */
+               }
+               else {
+                       hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+                       hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+                       key = hash_store[0];
+
+                       /* cache the key */
+                       if (key == HASH_TABLE_KEY_UNSET) {
+                               key = HASH_TABLE_KEY_FALLBACK;
+                       }
+                       chunk->key = key;
+               }
+#else
+               hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+               hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+               key = hash_store[0];
+#endif
+               return key;
+       }
+       else {
+               /* corner case - we're too small, calculate the key each time. */
+
+               hash_array_from_cref(info, cref, info->accum_read_ahead_bytes, hash_store);
+               hash_accum_single(hash_store, hash_store_len, info->accum_steps);
+               hash_key key = hash_store[0];
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+               if (UNLIKELY(key == HASH_TABLE_KEY_UNSET)) {
+                       key = HASH_TABLE_KEY_FALLBACK;
+               }
+#endif
+               return key;
+       }
+}
+
+static const BChunkRef *table_lookup(
+        const BArrayInfo *info, BTableRef **table, const size_t table_len, const size_t i_table_start,
+        const ubyte *data, const size_t data_len, const size_t offset, const hash_key *table_hash_array)
+{
+       size_t size_left = data_len - offset;
+       hash_key key = table_hash_array[((offset - i_table_start) / info->chunk_stride)];
+       size_t key_index = (size_t)(key % (hash_key)table_len);
+       for (BTableRef *tref = table[key_index]; tref; tref = tref->next) {
+               const BChunkRef *cref = tref->cref;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+               if (cref->link->key == key)
+#endif
+               {
+                       BChunk *chunk_test = cref->link;
+                       if (chunk_test->data_len <= size_left) {
+                               if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+                                       /* we could remove the chunk from the table, to avoid multiple hits */
+                                       return cref;
+                               }
+                       }
+               }
+       }
+       return NULL;
+}
+
+#else  /* USE_HASH_TABLE_ACCUMULATE */
+
+/* NON USE_HASH_TABLE_ACCUMULATE code (simply hash each chunk) */
+
+static hash_key key_from_chunk_ref(const BArrayInfo *info, const BChunkRef *cref)
+{
+       const size_t data_hash_len = BCHUNK_HASH_LEN * info->chunk_stride;
+       hash_key key;
+       BChunk *chunk = cref->link;
+
+#ifdef USE_HASH_TABLE_KEY_CACHE
+       key = chunk->key;
+       if (key != HASH_TABLE_KEY_UNSET) {
+               /* Using key cache!
+                * avoids calculating every time */
+       }
+       else {
+               /* cache the key */
+               key = hash_data(chunk->data, data_hash_len);
+               if (key == HASH_TABLE_KEY_UNSET) {
+                       key = HASH_TABLE_KEY_FALLBACK;
+               }
+               chunk->key = key;
+       }
+#else
+       key = hash_data(chunk->data, data_hash_len);
+#endif
+
+       return key;
+}
+
+static const BChunkRef *table_lookup(
+        const BArrayInfo *info, BTableRef **table, const size_t table_len, const uint UNUSED(i_table_start),
+        const ubyte *data, const size_t data_len, const size_t offset, const hash_key *UNUSED(table_hash_array))
+{
+       const size_t data_hash_len = BCHUNK_HASH_LEN * info->chunk_stride;  /* TODO, cache */
+
+       size_t size_left = data_len - offset;
+       hash_key key = hash_data(&data[offset], MIN2(data_hash_len, size_left));
+       size_t key_index = (size_t)(key % (hash_key)table_len);
+       for (BTableRef *tref = table[key_index]; tref; tref = tref->next) {
+               const BChunkRef *cref = tref->cref;
+#ifdef USE_HASH_TABLE_KEY_CACHE
+               if (cref->link->key == key)
+#endif
+               {
+                       BChunk *chunk_test = cref->link;
+                       if (chunk_test->data_len <= size_left) {
+                               if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+                                       /* we could remove the chunk from the table, to avoid multiple hits */
+                                       return cref;
+                               }
+                       }
+               }
+       }
+       return NULL;
+}
+
+#endif  /* USE_HASH_TABLE_ACCUMULATE */
+
+/* End Table Lookup
+ * ---------------- */
+
+/** \} */
+
+/**
+ * \param data: Data to store in the returned value.
+ * \param data_len_original: Length of data in bytes.
+ * \param chunk_list_reference: Reuse this list or chunks within it, don't modify its content.
+ * \note Caller is responsible for adding the user.
+ */
+static BChunkList *bchunk_list_from_data_merge(
+        const BArrayInfo *info, BArrayMemory *bs_mem,
+        const ubyte *data, const size_t data_len_original,
+        const BChunkList *chunk_list_reference)
+{
+       ASSERT_CHUNKLIST_SIZE(chunk_list_reference, chunk_list_reference->total_size);
+
+       /* -----------------------------------------------------------------------
+        * Fast-Path for exact match
+        * Check for exact match, if so, return the current list.
+        */
+
+       const BChunkRef *cref_match_first = NULL;
+
+       uint chunk_list_reference_skip_len = 0;
+       size_t chunk_list_reference_skip_bytes = 0;
+       size_t i_prev = 0;
+
+#ifdef USE_FASTPATH_CHUNKS_FIRST
+       bool full_match = false;
+
+       {
+               full_match = true;
+
+               const BChunkRef *cref = chunk_list_reference->chunk_refs.first;
+               while (i_prev < data_len_original) {
+                       if (cref != NULL && bchunk_data_compare(cref->link, data, data_len_original, i_prev)) {
+                               cref_match_first = cref;
+                               chunk_list_reference_skip_len += 1;
+                               chunk_list_reference_skip_bytes += cref->link->data_len;
+                               i_prev += cref->link->data_len;
+                               cref = cref->next;
+                       }
+                       else {
+                               full_match = false;
+                               break;
+                       }
+               }
+
+               if (full_match) {
+                       if (chunk_list_reference->total_size == data_len_original) {
+                               return (BChunkList *)chunk_list_reference;
+                       }
+               }
+       }
+
+       /* End Fast-Path (first)
+        * --------------------- */
+
+#endif  /* USE_FASTPATH_CHUNKS_FIRST */
+
+       /* Copy until we have a mismatch */
+       BChunkList *chunk_list = bchunk_list_new(bs_mem, data_len_original);
+       if (cref_match_first != NULL) {
+               size_t chunk_size_step = 0;
+               const BChunkRef *cref = chunk_list_reference->chunk_refs.first;
+               while (true) {
+                       BChunk *chunk = cref->link;
+                       chunk_size_step += chunk->data_len;
+                       bchunk_list_append_only(bs_mem, chunk_list, chunk);
+                       ASSERT_CHUNKLIST_SIZE(chunk_list, chunk_size_step);
+                       ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                       if (cref == cref_match_first) {
+                               break;
+                       }
+                       else {
+                               cref = cref->next;
+                       }
+               }
+               /* happens when bytes are removed from the end of the array */
+               if (chunk_size_step == data_len_original) {
+                       return chunk_list;
+               }
+
+               i_prev = chunk_size_step;
+       }
+       else {
+               i_prev = 0;
+       }
+
+       /* ------------------------------------------------------------------------
+        * Fast-Path for end chunks
+        *
+        * Check for trailing chunks
+        */
+
+       /* In this case use 'chunk_list_reference_last' to define the last index
+        * index_match_last = -1 */
+
+       /* warning, from now on don't use len(data)
+        * since we want to ignore chunks already matched */
+       size_t data_len = data_len_original;
+#define data_len_original invalid_usage
+#ifdef data_len_original  /* quiet warning */
+#endif
+
+       const BChunkRef *chunk_list_reference_last = NULL;
+
+#ifdef USE_FASTPATH_CHUNKS_LAST
+       if (!BLI_listbase_is_empty(&chunk_list_reference->chunk_refs)) {
+               const BChunkRef *cref = chunk_list_reference->chunk_refs.last;
+               while ((cref->prev != NULL) &&
+                      (cref != cref_match_first) &&
+                      (cref->link->data_len <= data_len - i_prev))
+               {
+                       BChunk *chunk_test = cref->link;
+                       size_t offset = data_len - chunk_test->data_len;
+                       if (bchunk_data_compare(chunk_test, data, data_len, offset)) {
+                               data_len = offset;
+                               chunk_list_reference_last = cref;
+                               chunk_list_reference_skip_len += 1;
+                               chunk_list_reference_skip_bytes += cref->link->data_len;
+                               cref = cref->prev;
+                       }
+                       else {
+                               break;
+                       }
+               }
+       }
+
+       /* End Fast-Path (last)
+        * -------------------- */
+#endif  /* USE_FASTPATH_CHUNKS_LAST */
+
+       /* -----------------------------------------------------------------------
+        * Check for aligned chunks
+        *
+        * This saves a lot of searching, so use simple heuristics to detect aligned arrays.
+        * (may need to tweak exact method).
+        */
+
+       bool use_aligned = false;
+
+#ifdef USE_ALIGN_CHUNKS_TEST
+       if (chunk_list->total_size == chunk_list_reference->total_size) {
+               /* if we're already a quarter aligned */
+               if (data_len - i_prev <= chunk_list->total_size / 4) {
+                       use_aligned = true;
+               }
+               else {
+                       /* TODO, walk over chunks and check if some arbitrary amount align */
+               }
+       }
+#endif  /* USE_ALIGN_CHUNKS_TEST */
+
+       /* End Aligned Chunk Case
+        * ----------------------- */
+
+       if (use_aligned) {
+               /* Copy matching chunks, creates using the same 'layout' as the reference */
+               const BChunkRef *cref = cref_match_first ? cref_match_first->next : chunk_list_reference->chunk_refs.first;
+               while (i_prev != data_len) {
+                       const size_t i = i_prev + cref->link->data_len;
+                       BLI_assert(i != i_prev);
+
+                       if ((cref != chunk_list_reference_last) &&
+                           bchunk_data_compare(cref->link, data, data_len, i_prev))
+                       {
+                               bchunk_list_append(info, bs_mem, chunk_list, cref->link);
+                               ASSERT_CHUNKLIST_SIZE(chunk_list, i);
+                               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                       }
+                       else {
+                               bchunk_list_append_data(info, bs_mem, chunk_list, &data[i_prev], i - i_prev);
+                               ASSERT_CHUNKLIST_SIZE(chunk_list, i);
+                               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                       }
+
+                       cref = cref->next;
+
+                       i_prev = i;
+               }
+       }
+       else if ((data_len - i_prev >= info->chunk_byte_size) &&
+                (chunk_list_reference->chunk_refs_len >= chunk_list_reference_skip_len) &&
+                (chunk_list_reference->chunk_refs.first != NULL))
+       {
+
+               /* --------------------------------------------------------------------
+                * Non-Aligned Chunk De-Duplication */
+
+               /* only create a table if we have at least one chunk to search
+                * otherwise just make a new one.
+                *
+                * Support re-arranged chunks */
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+               size_t i_table_start = i_prev;
+               const size_t table_hash_array_len = (data_len - i_prev) / info->chunk_stride;
+               hash_key  *table_hash_array = MEM_mallocN(sizeof(*table_hash_array) * table_hash_array_len, __func__);
+               hash_array_from_data(info, &data[i_prev], data_len - i_prev, table_hash_array);
+
+               hash_accum(table_hash_array, table_hash_array_len, info->accum_steps);
+#else
+               /* dummy vars */
+               uint i_table_start = 0;
+               hash_key *table_hash_array = NULL;
+#endif
+
+               const size_t table_len = MAX2(1u, (((data_len - i_prev) / info->chunk_stride)) / BCHUNK_HASH_TABLE_DIV);
+               BTableRef **table = MEM_callocN(table_len * sizeof(*table), __func__);
+
+               const uint chunk_list_reference_remaining_len =
+                       (chunk_list_reference->chunk_refs_len - chunk_list_reference_skip_len) + 1;
+               BTableRef *table_ref_stack = MEM_mallocN(chunk_list_reference_remaining_len * sizeof(BTableRef), __func__);
+               uint       table_ref_stack_n = 0;
+
+               /* table_make - inline
+                * include one matching chunk, to allow for repeating values */
+               {
+#ifdef USE_HASH_TABLE_ACCUMULATE
+                       const size_t hash_store_len = info->accum_read_ahead_len;
+                       hash_key *hash_store = MEM_mallocN(sizeof(hash_key) * hash_store_len, __func__);
+#endif
+
+                       const BChunkRef *cref;
+                       size_t chunk_list_reference_bytes_remaining =
+                               chunk_list_reference->total_size - chunk_list_reference_skip_bytes;
+
+                       if (cref_match_first) {
+                               cref = cref_match_first;
+                               chunk_list_reference_bytes_remaining += cref->link->data_len;
+                       }
+                       else {
+                               cref = chunk_list_reference->chunk_refs.first;
+                       }
+
+#ifdef USE_PARANOID_CHECKS
+                       {
+                               size_t test_bytes_len = 0;
+                               const BChunkRef *cr = cref;
+                               while (cr != chunk_list_reference_last) {
+                                       test_bytes_len += cr->link->data_len;
+                                       cr = cr->next;
+                               }
+                               BLI_assert(test_bytes_len == chunk_list_reference_bytes_remaining);
+                       }
+#endif
+
+                       while ((cref != chunk_list_reference_last) &&
+                              (chunk_list_reference_bytes_remaining >= info->accum_read_ahead_bytes))
+                       {
+                               hash_key key = key_from_chunk_ref(info, cref
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+                                                                 , hash_store, hash_store_len
+#endif
+                                                                 );
+                               size_t key_index = (size_t)(key % (hash_key)table_len);
+                               BTableRef *tref_prev = table[key_index];
+                               BLI_assert(table_ref_stack_n < chunk_list_reference_remaining_len);
+                               BTableRef *tref = &table_ref_stack[table_ref_stack_n++];
+                               tref->cref = cref;
+                               tref->next = tref_prev;
+                               table[key_index] = tref;
+
+                               chunk_list_reference_bytes_remaining -= cref->link->data_len;
+                               cref = cref->next;
+                       }
+
+                       BLI_assert(table_ref_stack_n <= chunk_list_reference_remaining_len);
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+                       MEM_freeN(hash_store);
+#endif
+               }
+               /* done making the table */
+
+               BLI_assert(i_prev <= data_len);
+               for (size_t i = i_prev; i < data_len; ) {
+                       /* Assumes exiting chunk isnt a match! */
+
+                       const BChunkRef *cref_found = table_lookup(
+                               info,
+                               table, table_len, i_table_start,
+                               data, data_len, i, table_hash_array);
+                       if (cref_found != NULL) {
+                               BLI_assert(i < data_len);
+                               if (i != i_prev) {
+                                       size_t i_step = MIN2(i_prev + info->chunk_byte_size, data_len);
+                                       BLI_assert(i_step <= data_len);
+
+                                       while (i_prev != i) {
+                                               i_step = MIN2(i_step, i);
+                                               const ubyte  *data_slice = &data[i_prev];
+                                               const size_t  data_slice_len = i_step - i_prev;
+                                               /* First add all previous chunks! */
+                                               i_prev += data_slice_len;
+                                               bchunk_list_append_data(info, bs_mem, chunk_list, data_slice, data_slice_len);
+                                               BLI_assert(i_prev <= data_len);
+                                               ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+                                               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                                               i_step += info->chunk_byte_size;
+                                       }
+                               }
+
+                               /* now add the reference chunk */
+                               {
+                                       BChunk *chunk_found = cref_found->link;
+                                       i += chunk_found->data_len;
+                                       bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
+                               }
+                               i_prev = i;
+                               BLI_assert(i_prev <= data_len);
+                               ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+                               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+                               /* its likely that the next chunk in the list will be a match, so check it! */
+                               while ((cref_found->next != NULL) &&
+                                      (cref_found->next != chunk_list_reference_last))
+                               {
+                                       cref_found = cref_found->next;
+                                       BChunk *chunk_found = cref_found->link;
+
+                                       if (bchunk_data_compare(chunk_found, data, data_len, i_prev)) {
+                                               /* may be useful to remove table data, assuming we dont have repeating memory
+                                                * where it would be useful to re-use chunks. */
+                                               i += chunk_found->data_len;
+                                               bchunk_list_append(info, bs_mem, chunk_list, chunk_found);
+                                               /* chunk_found may be freed! */
+                                               i_prev = i;
+                                               BLI_assert(i_prev <= data_len);
+                                               ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+                                               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                                       }
+                                       else {
+                                               break;
+                                       }
+                               }
+                       }
+                       else {
+                               i = i + info->chunk_stride;
+                       }
+               }
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+               MEM_freeN(table_hash_array);
+#endif
+               MEM_freeN(table);
+               MEM_freeN(table_ref_stack);
+
+               /* End Table Lookup
+                * ---------------- */
+       }
+
+       ASSERT_CHUNKLIST_SIZE(chunk_list, i_prev);
+       ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+       /* -----------------------------------------------------------------------
+        * No Duplicates to copy, write new chunks
+        *
+        * Trailing chunks, no matches found in table lookup above.
+        * Write all new data. */
+       BLI_assert(i_prev <= data_len);
+       while (i_prev != data_len) {
+               size_t i = i_prev + info->chunk_byte_size;
+               i = MIN2(i, data_len);
+               BLI_assert(i != i_prev);
+               bchunk_list_append_data(info, bs_mem, chunk_list, &data[i_prev], i - i_prev);
+               ASSERT_CHUNKLIST_DATA(chunk_list, data);
+               i_prev = i;
+       }
+
+       BLI_assert(i_prev == data_len);
+
+#ifdef USE_FASTPATH_CHUNKS_LAST
+       if (chunk_list_reference_last != NULL) {
+               /* write chunk_list_reference_last since it hasn't been written yet */
+               const BChunkRef *cref = chunk_list_reference_last;
+               while (cref != NULL) {
+                       BChunk *chunk = cref->link;
+                       // BLI_assert(bchunk_data_compare(chunk, data, data_len, i_prev));
+                       i_prev += chunk->data_len;
+                       /* use simple since we assume the references chunks have already been sized correctly. */
+                       bchunk_list_append_only(bs_mem, chunk_list, chunk);
+                       ASSERT_CHUNKLIST_DATA(chunk_list, data);
+                       cref = cref->next;
+               }
+       }
+#endif
+
+#undef data_len_original
+
+       BLI_assert(i_prev == data_len_original);
+
+       /* check we're the correct size and that we didn't accidentally modify the reference */
+       ASSERT_CHUNKLIST_SIZE(chunk_list, data_len_original);
+       ASSERT_CHUNKLIST_SIZE(chunk_list_reference, chunk_list_reference->total_size);
+
+       ASSERT_CHUNKLIST_DATA(chunk_list, data);
+
+       return chunk_list;
+}
+/* end private API */
+
+/** \} */
+
+
+/** \name Main Array Storage API
+ * \{ */
+
+
+/**
+ * Create a new array store, which can store any number of arrays
+ * as long as their stride matches.
+ *
+ * \param stride: ``sizeof()`` each element,
+ *
+ * \note while a stride of ``1`` will always work,
+ * its less efficient since duplicate chunks of memory will be searched
+ * at positions unaligned with the array data.
+ *
+ * \param chunk_count: Number of elements to split each chunk into.
+ * - A small value increases the ability to de-duplicate chunks,
+ *   but adds overhead by increasing the number of chunks to look-up when searching for duplicates,
+ *   as well as some overhead constructing the original array again, with more calls to ``memcpy``.
+ * - Larger values reduce the *book keeping* overhead,
+ *   but increase the chance a small, isolated change will cause a larger amount of data to be duplicated.
+ *
+ * \return A new array store, to be freed with #BLI_array_store_destroy.
+ */
+BArrayStore *BLI_array_store_create(
+        uint stride,
+        uint chunk_count)
+{
+       BArrayStore *bs = MEM_callocN(sizeof(BArrayStore), __func__);
+
+       bs->info.chunk_stride = stride;
+       bs->info.chunk_count = chunk_count;
+
+       bs->info.chunk_byte_size = chunk_count * stride;
+#ifdef USE_MERGE_CHUNKS
+       bs->info.chunk_byte_size_min = MAX2(1u, chunk_count / BCHUNK_SIZE_MIN_DIV) * stride;
+#endif
+
+#ifdef USE_HASH_TABLE_ACCUMULATE
+       bs->info.accum_steps = BCHUNK_HASH_TABLE_ACCUMULATE_STEPS - 1;
+       /* Triangle number, identifying now much read-ahead we need:
+        * https://en.wikipedia.org/wiki/Triangular_number (+ 1) */
+       bs->info.accum_read_ahead_len   = (uint)((((bs->info.accum_steps * (bs->info.accum_steps + 1))) / 2) + 1);
+       bs->info.accum_read_ahead_bytes = bs->info.accum_read_ahead_len * stride;
+#else
+       bs->info.accum_read_ahead_bytes = BCHUNK_HASH_LEN  * stride;
+#endif
+
+       bs->memory.chunk_list   = BLI_mempool_create(sizeof(BChunkList), 0, 512, BLI_MEMPOOL_NOP);
+       bs->memory.chunk_ref    = BLI_mempool_create(sizeof(BChunkRef),  0, 512, BLI_MEMPOOL_NOP);
+       /* allow iteration to simplify freeing, otherwise its not needed
+        * (we could loop over all states as an alternative). */
+       bs->memory.chunk        = BLI_mempool_create(sizeof(BChunk),     0, 512, BLI_MEMPOOL_ALLOW_ITER);
+
+       return bs;
+}
+
+static void array_store_free_data(BArrayStore *bs)
+{
+       /* free chunk data */
+       {
+               BLI_mempool_iter iter;
+               BChunk *chunk;
+               BLI_mempool_iternew(bs->memory.chunk, &iter);
+               while ((chunk = BLI_mempool_iterstep(&iter))) {
+                       BLI_assert(chunk->users > 0);
+                       MEM_freeN((void *)chunk->data);
+               }
+       }
+
+       /* free states */
+       for (BArrayState *state = bs->states.first, *state_next; state; state = state_next) {
+               state_next = state->next;
+               MEM_freeN(state);
+       }
+}
+
+/**
+ * Free the #BArrayStore, including all states and chunks.
+ */
+void BLI_array_store_destroy(
+        BArrayStore *bs)
+{
+       array_store_free_data(bs);
+
+       BLI_mempool_destroy(bs->memory.chunk_list);
+       BLI_mempool_destroy(bs->memory.chunk_ref);
+       BLI_mempool_destroy(bs->memory.chunk);
+
+       MEM_freeN(bs);
+}
+
+/**
+ * Clear all contents, allowing reuse of \a bs.
+ */
+void BLI_array_store_clear(
+        BArrayStore *bs)
+{
+       array_store_free_data(bs);
+
+       BLI_listbase_clear(&bs->states);
+
+       BLI_mempool_clear(bs->memory.chunk_list);
+       BLI_mempool_clear(bs->memory.chunk_ref);
+       BLI_mempool_clear(bs->memory.chunk);
+}
+
+/** \name BArrayStore Statistics
+ * \{ */
+
+/**
+ * \return the total amount of memory that would be used by getting the arrays for all states.
+ */
+size_t BLI_array_store_calc_size_expanded_get(
+        const BArrayStore *bs)
+{
+       size_t size_accum = 0;
+       for (const BArrayState *state = bs->states.first; state; state = state->next) {
+               size_accum += state->chunk_list->total_size;
+       }
+       return size_accum;
+}
+
+/**
+ * \return the amount of memory used by all #BChunk.data
+ * (duplicate chunks are only counted once).
+ */
+size_t BLI_array_store_calc_size_compacted_get(
+        const BArrayStore *bs)
+{
+       size_t size_total = 0;
+       BLI_mempool_iter iter;
+       BChunk *chunk;
+       BLI_mempool_iternew(bs->memory.chunk, &iter);
+       while ((chunk = BLI_mempool_iterstep(&iter))) {
+               BLI_assert(chunk->users > 0);
+               size_total += (size_t)chunk->data_len;
+       }
+       return size_total;
+}
+
+/** \} */
+
+
+/** \name BArrayState Access
+ * \{ */
+
+/**
+ *
+ * \param data: Data used to create
+ * \param state_reference: The state to use as a reference when adding the new state,
+ * typically this is the previous state,
+ * however it can be any previously created state from this \a bs.
+ *
+ * \return The new state, which is used by the caller as a handle to get back the contents of \a data.
+ * This may be removed using #BLI_array_store_state_remove,
+ * otherwise it will be removed with #BLI_array_store_destroy.
+ */
+BArrayState *BLI_array_store_state_add(
+        BArrayStore *bs,
+        const void *data, const size_t data_len,
+        const BArrayState *state_reference)
+{
+    /* ensure we're aligned to the stride */
+       BLI_assert((data_len % bs->info.chunk_stride) == 0);
+
+#ifdef USE_PARANOID_CHECKS
+       if (state_reference) {
+               BLI_assert(BLI_findindex(&bs->states, state_reference) != -1);
+       }
+#endif
+
+       static BChunkList *chunk_list;
+       if (state_reference) {
+               chunk_list = bchunk_list_from_data_merge(
+                       &bs->info, &bs->memory,
+                       (const ubyte *)data, data_len,
+                       /* re-use reference chunks */
+                       state_reference->chunk_list);
+       }
+       else {
+               chunk_list = bchunk_list_new(&bs->memory, data_len);
+               bchunk_list_fill_from_array(
+                       &bs->info, &bs->memory,
+                       chunk_list,
+                       (const ubyte *)data, data_len);
+       }
+
+       chunk_list->users += 1;
+
+       BArrayState *state = MEM_callocN(sizeof(BArrayState), __func__);
+       state->chunk_list = chunk_list;
+
+       BLI_addtail(&bs->states, state);
+
+#ifdef USE_PARANOID_CHECKS
+       {
+               size_t data_test_len;
+               void *data_test = BLI_array_store_state_data_get_alloc(state, &data_test_len);
+               BLI_assert(data_test_len == data_len);
+               BLI_assert(memcmp(data_test, data, data_len) == 0);
+               MEM_freeN(data_test);
+       }
+#endif
+
+       return state;
+}
+
+/**
+ * Remove a state and free any unused #BChunk data.
+ *
+ * The states can be freed in any order.
+ */
+void BLI_array_store_state_remove(
+        BArrayStore *bs,
+        BArrayState *state)
+{
+#ifdef USE_PARANOID_CHECKS
+       BLI_assert(BLI_findindex(&bs->states, state) != -1);
+#endif
+
+       bchunk_list_decref(&bs->memory, state->chunk_list);
+       BLI_remlink(&bs->states, state);
+
+       MEM_freeN(state);
+}
+
+/**
+ * \return the expanded size of the array,
+ * use this to know how much memory to allocate #BLI_array_store_state_data_get's argument.
+ */
+size_t BLI_array_store_state_size_get(
+        BArrayState *state)
+{
+       return state->chunk_list->total_size;
+}
+
+/**
+ * Fill in existing allocated memory with the contents of \a state.
+ */
+void BLI_array_store_state_data_get(
+        BArrayState *state,
+        void *data)
+{
+#ifdef USE_PARANOID_CHECKS
+       size_t data_test_len = 0;
+       for (BChunkRef *cref = state->chunk_list->chunk_refs.first; cref; cref = cref->next) {
+               data_test_len += cref->link->data_len;
+       }
+       BLI_assert(data_test_len == state->chunk_list->total_size);
+#endif
+
+       ubyte *data_step = (ubyte *)data;
+       for (BChunkRef *cref = state->chunk_list->chunk_refs.first; cref; cref = cref->next) {
+               BLI_assert(cref->link->users > 0);
+               memcpy(data_step, cref->link->data, cref->link->data_len);
+               data_step += cref->link->data_len;
+       }
+}
+
+/**
+ * Allocate an array for \a state and return it.
+ */
+void *BLI_array_store_state_data_get_alloc(
+        BArrayState *state,
+        size_t *r_data_len)
+{
+       void *data = MEM_mallocN(state->chunk_list->total_size, __func__);
+       BLI_array_store_state_data_get(state, data);
+       *r_data_len = state->chunk_list->total_size;
+       return data;
+}
+
+/** \} */
+
+
+/** \name Debigging API (for testing).
+ * \{ */
+
+/* only for test validation */
+static size_t bchunk_list_size(const BChunkList *chunk_list)
+{
+       size_t total_size = 0;
+       for (BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+               total_size += cref->link->data_len;
+       }
+       return total_size;
+}
+
+bool BLI_array_store_is_valid(
+        BArrayStore *bs)
+{
+       bool ok = true;
+
+       /* Check Length
+        * ------------ */
+
+       for (BArrayState *state = bs->states.first; state; state = state->next) {
+               BChunkList *chunk_list = state->chunk_list;
+               if (!(bchunk_list_size(chunk_list) == chunk_list->total_size)) {
+                       return false;
+               }
+       }
+
+       {
+               BLI_mempool_iter iter;
+               BChunk *chunk;
+               BLI_mempool_iternew(bs->memory.chunk, &iter);
+               while ((chunk = BLI_mempool_iterstep(&iter))) {
+                       if (!(MEM_allocN_len(chunk->data) >= chunk->data_len)) {
+                               return false;
+                       }
+               }
+       }
+
+       /* Check User Count & Lost References
+        * ---------------------------------- */
+       {
+               GHashIterator gh_iter;
+
+#define GHASH_PTR_ADD_USER(gh, pt) \
+       { \
+               void **val; \
+               if (BLI_ghash_ensure_p((gh), (pt), &val)) { \
+                       *((int *)val) += 1; \
+               } \
+               else { \
+                       *((int *)val) = 1; \
+               } \
+       } ((void)0)
+
+
+               /* count chunk_list's */
+               int totrefs = 0;
+               GHash *chunk_list_map = BLI_ghash_ptr_new(__func__);
+               for (BArrayState *state = bs->states.first; state; state = state->next) {
+                       GHASH_PTR_ADD_USER(chunk_list_map, state->chunk_list);
+               }
+               GHASH_ITER (gh_iter, chunk_list_map) {
+                       const struct BChunkList *chunk_list = BLI_ghashIterator_getKey(&gh_iter);
+                       const int users =  GET_INT_FROM_POINTER(BLI_ghashIterator_getValue(&gh_iter));
+                       if (!(chunk_list->users == users)) {
+                               ok = false;
+                               goto user_finally;
+                       }
+               }
+               if (!(BLI_mempool_count(bs->memory.chunk_list) == (int)BLI_ghash_size(chunk_list_map))) {
+                       ok = false;
+                       goto user_finally;
+               }
+
+               /* count chunk's */
+               GHash *chunk_map = BLI_ghash_ptr_new(__func__);
+               GHASH_ITER (gh_iter, chunk_list_map) {
+                       const struct BChunkList *chunk_list = BLI_ghashIterator_getKey(&gh_iter);
+                       for (const BChunkRef *cref = chunk_list->chunk_refs.first; cref; cref = cref->next) {
+                               GHASH_PTR_ADD_USER(chunk_map, cref->link);
+                               totrefs += 1;
+                       }
+               }
+               if (!(BLI_mempool_count(bs->memory.chunk) == (int)BLI_ghash_size(chunk_map))) {
+                       ok = false;
+                       goto user_finally;
+               }
+               if (!(BLI_mempool_count(bs->memory.chunk_ref) == totrefs)) {
+                       ok = false;
+                       goto user_finally;
+               }
+
+               GHASH_ITER (gh_iter, chunk_map) {
+                       const struct BChunk *chunk = BLI_ghashIterator_getKey(&gh_iter);
+                       const int users =  GET_INT_FROM_POINTER(BLI_ghashIterator_getValue(&gh_iter));
+                       if (!(chunk->users == users)) {
+                               ok = false;
+                               goto user_finally;
+                       }
+               }
+
+#undef GHASH_PTR_ADD_USER
+
+user_finally:
+               BLI_ghash_free(chunk_list_map, NULL, NULL);
+               BLI_ghash_free(chunk_map, NULL, NULL);
+       }
+
+
+       return ok;
+       /* TODO, dangling pointer checks */
+}
+
+/** \} */