Tools: Add a 'raw block' support/mode to blendfile module and blend2json tool.

This much more basic representation of a blendfile is designed to survive badly corrupted data, e.g. when handling files without DNA info, etc. Obviously the amount of data extracted is way less, but it's still eaiser to analize than dealing with pure binary data.
2024-02-19 15:01:06 +01:00
parent b31e6bd93e
commit 6f74bc5c05
2 changed files with 274 additions and 58 deletions
--- a/tools/modules/blendfile.py
+++ b/tools/modules/blendfile.py
@@ -25,54 +25,6 @@ class BlendFileError(Exception):
    """Raised when there was an error reading/parsing a blend file."""


-# -----------------------------------------------------------------------------
-# module global routines
-#
-# read routines
-# open a filename
-# determine if the file is compressed
-# and returns a handle
-def open_blend(filename, access="rb"):
-    """Opens a blend file for reading or writing pending on the access
-    supports 2 kind of blend files. Uncompressed and compressed.
-    Known issue: does not support packaged blend files
-    """
-    handle = open(filename, access)
-    magic_test = b"BLENDER"
-    magic = handle.read(len(magic_test))
-    if magic == magic_test:
-        log.debug("normal blendfile detected")
-        handle.seek(0, os.SEEK_SET)
-        bfile = BlendFile(handle)
-        bfile.is_compressed = False
-        bfile.filepath_orig = filename
-        return bfile
-    elif magic[:2] == b'\x1f\x8b':
-        log.debug("gzip blendfile detected")
-        handle.close()
-        log.debug("decompressing started")
-        fs = gzip.open(filename, "rb")
-        data = fs.read(FILE_BUFFER_SIZE)
-        magic = data[:len(magic_test)]
-        if magic == magic_test:
-            handle = tempfile.TemporaryFile()
-            while data:
-                handle.write(data)
-                data = fs.read(FILE_BUFFER_SIZE)
-            log.debug("decompressing finished")
-            fs.close()
-            log.debug("resetting decompressed file")
-            handle.seek(os.SEEK_SET, 0)
-            bfile = BlendFile(handle)
-            bfile.is_compressed = True
-            bfile.filepath_orig = filename
-            return bfile
-        else:
-            raise BlendFileError("filetype inside gzip not a blend")
-    else:
-        raise BlendFileError("filetype not a blend or a gzip blend")
-
-
 def pad_up_4(offset):
    return (offset + 3) & ~3

@@ -493,7 +445,7 @@ class BlendFileBlock:
        for k in self.keys():
            yield from self.get_recursive_iter(k, use_nil=use_nil, use_str=False)

-    def get_data_hash(self):
+    def get_data_hash(self, seed=1):
        """
        Generates a 'hash' that can be used instead of addr_old as block id, and that should be 'stable' across .blend
        file load & save (i.e. it does not changes due to pointer addresses variations).
@@ -506,7 +458,7 @@ class BlendFileBlock:
            return self.file.structs[self.sdna_index].field_from_path(
                self.file.header, self.file.handle, k).dna_name.is_pointer

-        hsh = 1
+        hsh = seed
        for k, v in self.items_recursive_iter():
            if not _is_pointer(self, k):
                hsh = zlib.adler32(str(v).encode(), hsh)
@@ -586,6 +538,198 @@ class BlendFileBlock:
                yield (k, "<%s>" % dna_type.dna_type_id.decode('ascii'))


+########################################################################################################################
+# Way more basic access to blendfile data, without any DNA handling.
+
+class BlendFileRaw:
+    """
+    Blend file, at a very low-level (only a collection of blocks). Can survive opening e.g. blendfiles withou DNA info.
+    """
+    __slots__ = (
+        # file (result of open())
+        "handle",
+        # str (original name of the file path)
+        "filepath_orig",
+        # BlendFileHeader
+        "header",
+        # struct.Struct
+        "block_header_struct",
+        # BlendFileBlock
+        "blocks",
+        # dict {addr_old: block}
+        "block_from_offset",
+        # int
+        "code_index",
+        # bool (did we make a change)
+        "is_modified",
+        # bool (is file gzipped)
+        "is_compressed",
+    )
+
+    def __init__(self, handle):
+        log.debug("initializing reading blend-file")
+        self.handle = handle
+        self.header = BlendFileHeader(handle)
+        self.block_header_struct = self.header.create_block_header_struct()
+        self.blocks = []
+        self.code_index = {}
+
+        block = BlendFileBlockRaw(handle, self)
+        while block.code != b'ENDB':
+            handle.seek(block.size, os.SEEK_CUR)
+            self.blocks.append(block)
+            self.code_index.setdefault(block.code, []).append(block)
+
+            block = BlendFileBlockRaw(handle, self)
+        self.is_modified = False
+        self.blocks.append(block)
+
+        # Cache (could lazy init, in case we never use?).
+        self.block_from_offset = {block.addr_old: block for block in self.blocks if block.code != b'ENDB'}
+
+    def __repr__(self):
+        return '<%s %r>' % (self.__class__.__qualname__, self.handle)
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, type, value, traceback):
+        self.close()
+
+    def find_blocks_from_code(self, code):
+        assert type(code) == bytes
+        if code not in self.code_index:
+            return []
+        return self.code_index[code]
+
+    def find_block_from_offset(self, offset):
+        # same as looking looping over all blocks,
+        # then checking `block.addr_old == offset`.
+        assert type(offset) is int
+        return self.block_from_offset.get(offset)
+
+    def close(self):
+        """
+        Close the blend file
+        writes the blend file to disk if changes has happened
+        """
+        handle = self.handle
+
+        if self.is_modified:
+            if self.is_compressed:
+                log.debug("close compressed blend file")
+                handle.seek(os.SEEK_SET, 0)
+                log.debug("compressing started")
+                fs = gzip.open(self.filepath_orig, "wb")
+                data = handle.read(FILE_BUFFER_SIZE)
+                while data:
+                    fs.write(data)
+                    data = handle.read(FILE_BUFFER_SIZE)
+                fs.close()
+                log.debug("compressing finished")
+
+        handle.close()
+
+    def ensure_subtype_smaller(self, sdna_index_curr, sdna_index_next):
+        # never refine to a smaller type
+        if (self.structs[sdna_index_curr].size >
+                self.structs[sdna_index_next].size):
+
+            raise RuntimeError("cant refine to smaller type (%s -> %s)" %
+                               (self.structs[sdna_index_curr].dna_type_id.decode('ascii'),
+                                self.structs[sdna_index_next].dna_type_id.decode('ascii')))
+
+
+class BlendFileBlockRaw:
+    """
+    Instance of a raw blendfile block (only contains its header currently).
+    """
+    __slots__ = (
+        # BlendFile
+        "file",
+        "code",
+        "size",
+        "addr_old",
+        "sdna_index",
+        "count",
+        "file_offset",
+        "user_data",
+    )
+
+    def __str__(self):
+        return ("<%s.%s (%s), size=%d at %s>" %
+                # fields=[%s]
+                (self.__class__.__name__,
+                 self.dna_type_name,
+                 self.code.decode(),
+                 self.size,
+                 # b", ".join(f.dna_name.name_only for f in self.dna_type.fields).decode('ascii'),
+                 hex(self.addr_old),
+                 ))
+
+    def __init__(self, handle, bfile):
+        OLDBLOCK = struct.Struct(b'4sI')
+
+        self.file = bfile
+        self.user_data = None
+
+        data = handle.read(bfile.block_header_struct.size)
+
+        if len(data) != bfile.block_header_struct.size:
+            print("WARNING! Blend file seems to be badly truncated!")
+            self.code = b'ENDB'
+            self.size = 0
+            self.addr_old = 0
+            self.sdna_index = 0
+            self.count = 0
+            self.file_offset = 0
+            return
+        # header size can be 8, 20, or 24 bytes long
+        # 8: old blend files ENDB block (exception)
+        # 20: normal headers 32 bit platform
+        # 24: normal headers 64 bit platform
+        if len(data) > 15:
+            blockheader = bfile.block_header_struct.unpack(data)
+            self.code = blockheader[0].partition(b'\0')[0]
+            if self.code != b'ENDB':
+                self.size = blockheader[1]
+                self.addr_old = blockheader[2]
+                self.sdna_index = blockheader[3]
+                self.count = blockheader[4]
+                self.file_offset = handle.tell()
+            else:
+                self.size = 0
+                self.addr_old = 0
+                self.sdna_index = 0
+                self.count = 0
+                self.file_offset = 0
+        else:
+            blockheader = OLDBLOCK.unpack(data)
+            self.code = blockheader[0].partition(b'\0')[0]
+            self.code = DNA_IO.read_data0(blockheader[0])
+            self.size = 0
+            self.addr_old = 0
+            self.sdna_index = 0
+            self.count = 0
+            self.file_offset = 0
+
+    def get_data_hash(self, seed=1):
+        """
+        Generates a 'hash' that can be used instead of addr_old as block id, and that should be 'stable' across .blend
+        file load & save (i.e. it does not changes due to pointer addresses variations).
+        """
+        # TODO This implementation is most likely far from optimal... and CRC32 is not renown as the best hashing
+        #      algorithm either. But for now does the job!
+        import zlib
+
+        hsh = seed
+        hsh = zlib.adler32(str(self.code).encode(), hsh)
+        hsh = zlib.adler32(str(self.size).encode(), hsh)
+        hsh = zlib.adler32(str(self.sdna_index).encode(), hsh)
+        hsh = zlib.adler32(str(self.count).encode(), hsh)
+        return hsh
+
+
 # -----------------------------------------------------------------------------
 # Read Magic
 #
@@ -1022,3 +1166,51 @@ class DNA_IO:
        if header.pointer_size == 8:
            st = DNA_IO.ULONG[header.endian_index]
            return st.unpack(handle.read(st.size))[0]
+
+
+# -----------------------------------------------------------------------------
+# module global routines
+#
+# read routines
+# open a filename
+# determine if the file is compressed
+# and returns a handle
+def open_blend(filename, access="rb", wrapper_type=BlendFile):
+    """Opens a blend file for reading or writing pending on the access
+    supports 2 kind of blend files. Uncompressed and compressed.
+    Known issue: does not support packaged blend files
+    """
+    handle = open(filename, access)
+    magic_test = b"BLENDER"
+    magic = handle.read(len(magic_test))
+    if magic == magic_test:
+        log.debug("normal blendfile detected")
+        handle.seek(0, os.SEEK_SET)
+        bfile = wrapper_type(handle)
+        bfile.is_compressed = False
+        bfile.filepath_orig = filename
+        return bfile
+    elif magic[:2] == b'\x1f\x8b':
+        log.debug("gzip blendfile detected")
+        handle.close()
+        log.debug("decompressing started")
+        fs = gzip.open(filename, "rb")
+        data = fs.read(FILE_BUFFER_SIZE)
+        magic = data[:len(magic_test)]
+        if magic == magic_test:
+            handle = tempfile.TemporaryFile()
+            while data:
+                handle.write(data)
+                data = fs.read(FILE_BUFFER_SIZE)
+            log.debug("decompressing finished")
+            fs.close()
+            log.debug("resetting decompressed file")
+            handle.seek(os.SEEK_SET, 0)
+            bfile = wrapper_type(handle)
+            bfile.is_compressed = True
+            bfile.filepath_orig = filename
+            return bfile
+        else:
+            raise BlendFileError("filetype inside gzip not a blend")
+    else:
+        raise BlendFileError("filetype not a blend or a gzip blend")
--- a/tools/utils/blend2json.py
+++ b/tools/utils/blend2json.py
@@ -91,7 +91,7 @@ def keyval_to_json(kvs, indent, indent_step, compact_output=False):
    else:
        return ('{%s' % indent_step[:-1] +
                (',\n%s%s' % (indent, indent_step)).join(
-                    ('"%s":\n%s%s%s' % (k, indent, indent_step, v) if (v[0] in {'[', '{'}) else
+                    ('"%s":\n%s%s%s' % (k, indent, indent_step, v) if (v and v[0] in {'[', '{'}) else
                     '"%s": %s' % (k, v)) for k, v in kvs) +
                '\n%s}' % indent)

@@ -114,14 +114,17 @@ def gen_fake_addresses(args, blend):
    if args.use_fake_address:
        hashes = set()
        ret = {}
+        hash_seed = 1
        for block in blend.blocks:
            if not block.addr_old:
                continue
-            hsh = block.get_data_hash()
+            hsh = block.get_data_hash(hash_seed)
            while hsh in hashes:
                hsh += 1
            hashes.add(hsh)
            ret[block.addr_old] = hsh
+            if (args.raw_bblock):
+                hash_seed +=1
        return ret

    return {}
@@ -209,6 +212,7 @@ def do_bblock_filter(filters, blend, block, meta_keyval, data_keyval):


 def bblocks_to_json(args, fw, blend, address_map, indent, indent_step):
+    raw_bblock = args.raw_bblock
    no_address = args.no_address
    full_data = args.full_data
    filter_data = args.filter_data
@@ -217,16 +221,20 @@ def bblocks_to_json(args, fw, blend, address_map, indent, indent_step):
        keyval = [
            ("code", json_dumps(block.code)),
            ("size", json_dumps(block.size)),
+            ("file_offset", json_dumps(block.file_offset)),
        ]
        if not no_address:
            keyval += [("addr_old", json_dumps(address_map.get(block.addr_old, block.addr_old)))]
-        keyval += [
-            ("dna_type_id", json_dumps(blend.structs[block.sdna_index].dna_type_id)),
-            ("count", json_dumps(block.count)),
-        ]
+        if raw_bblock:
+            keyval += [("dna_index", json_dumps(block.sdna_index))]
+        else:
+            keyval += [("dna_type_id", json_dumps(blend.structs[block.sdna_index].dna_type_id))]
+        keyval += [("count", json_dumps(block.count))]
        return keyval

    def gen_data_keyval(blend, block, key_filter=None):
+        if raw_bblock:
+            return []
        def _is_pointer(k):
            return blend.structs[block.sdna_index].field_from_path(blend.header, blend.handle, k).dna_name.is_pointer
        if key_filter is not None:
@@ -306,8 +314,9 @@ def blend_to_json(args, f, blend, address_map):
    bheader_to_json(args, fw, blend, indent, indent_step)
    fw(',\n')
    bblocks_to_json(args, fw, blend, address_map, indent, indent_step)
-    fw(',\n')
-    bdna_to_json(args, fw, blend, indent, indent_step)
+    if not args.raw_bblock:
+        fw(',\n')
+        bdna_to_json(args, fw, blend, indent, indent_step)
    fw('\n}\n')


@@ -373,6 +382,12 @@ def argparse_create():
        "--full-dna", dest="full_dna", default=False, action='store_true', required=False,
        help=("Also put in JSon file dna properties description (ignored when --compact-output is used)"))

+    parser.add_argument(
+        "--raw-bblock", dest="raw_bblock",
+        default=False, action='store_true', required=False,
+        help=("Do not attempt to open and parse the Blendfile at a high level, but only handles its basic data layout "
+              "(usable when the given files are not valid blendfiles - e.g. corrupted ones)"))
+
    group = parser.add_argument_group("Filters", FILTER_DOC)
    group.add_argument(
        "--filter-block", dest="block_filters", nargs=3, action='append',
@@ -406,6 +421,15 @@ def main():
            args.filter_data = {n.encode() for n in args.filter_data.split(',')}

    for infile, outfile in zip(args.input, args.output):
+        if args.raw_bblock:
+            with blendfile.open_blend(infile, wrapper_type=blendfile.BlendFileRaw) as blend:
+                address_map = gen_fake_addresses(args, blend)
+
+                if outfile:
+                    with open(outfile, 'w', encoding="ascii", errors='xmlcharrefreplace') as f:
+                        blend_to_json(args, f, blend, address_map)
+            continue
+
        with blendfile.open_blend(infile) as blend:
            address_map = gen_fake_addresses(args, blend)