Core: extract blendfile_header.py as common utility for parsing .blend files

This new file can parse the file header (first few bytes) as well as the block headers. Right now, this is used by two places: * `blendfile.py` which is used by `blend2json.py` * `blend_render_info.py` This new module is shipped with Blender because it's needed for `blend_render_info.py` which is shipped with Blender too. This makes using it in `blendfile.py` (which is not shipped with Blender) a bit more annoying. However, this is already not ideal, because e.g. `blend2json` also has to add to `sys.path` already to be able to import `blendfile.py`. This new file could also be used by blender-asset-tracer (BAT). The new `BlendFileHeader` and `BlockHeader` types may be subclassed by code using it, because it wants to store additional derived data (`blendfile.py` and BAT need this). New tests have been added that check that the file and block header is parsed correctly for different kinds of .blend files. Pull Request: https://projects.blender.org/blender/blender/pulls/140341
2025-06-23 12:53:55 +02:00
parent a5399af388
commit f0c7e52ff2
10 changed files with 468 additions and 220 deletions
--- a/tools/modules/blendfile.py
+++ b/tools/modules/blendfile.py
@@ -18,11 +18,13 @@ __all__ = (
    "BlendFileRaw",
 )

+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "scripts", "modules"))

-from collections import namedtuple
+import blendfile_header
 import gzip
 import logging
-import os
 import struct
 import tempfile
 import zstandard as zstd
@@ -55,10 +57,8 @@ class BlendFile:
        "filepath_orig",
        # BlendFileHeader
        "header",
-        # struct.Struct
+        # blendfile_header.BlockHeaderStruct
        "block_header_struct",
-        # namedtuple
-        "block_header_fields",
        # BlendFileBlock
        "blocks",
        # [DNAStruct, ...]
@@ -80,7 +80,7 @@ class BlendFile:
        log.debug("initializing reading blend-file")
        self.handle = handle
        self.header = BlendFileHeader(handle)
-        self.block_header_struct, self.block_header_fields = self.header.create_block_header_struct()
+        self.block_header_struct = self.header.create_block_header_struct()
        self.blocks = []
        self.code_index = {}
        self.structs = []
@@ -245,18 +245,13 @@ class BlendFile:
        return structs, sdna_index_from_id


-class BlendFileBlock:
+class BlendFileBlock(blendfile_header.BlockHeader):
    """
    Instance of a struct.
    """
    __slots__ = (
        # BlendFile
        "file",
-        "code",
-        "size",
-        "addr_old",
-        "sdna_index",
-        "count",
        "file_offset",
        "user_data",
    )
@@ -273,46 +268,10 @@ class BlendFileBlock:
                 ))

    def __init__(self, handle, bfile):
+        super().__init__(handle, bfile.block_header_struct)
        self.file = bfile
        self.user_data = None
-
-        data = handle.read(bfile.block_header_struct.size)
-
-        if len(data) != bfile.block_header_struct.size:
-            print("WARNING! Blend file seems to be badly truncated!")
-            self.code = b'ENDB'
-            self.size = 0
-            self.addr_old = 0
-            self.sdna_index = 0
-            self.count = 0
-            self.file_offset = 0
-            return
-        # Header can be just 8 byte because of ENDB block in old .blend files.
-        if len(data) > 8:
-            blockheader = bfile.block_header_fields(*bfile.block_header_struct.unpack(data))
-            self.code = blockheader[0].partition(b'\0')[0]
-            if self.code != b'ENDB':
-                self.size = blockheader.len
-                self.addr_old = blockheader.old
-                self.sdna_index = blockheader.SDNAnr
-                self.count = blockheader.nr
-                self.file_offset = handle.tell()
-            else:
-                self.size = 0
-                self.addr_old = 0
-                self.sdna_index = 0
-                self.count = 0
-                self.file_offset = 0
-        else:
-            OLDBLOCK = struct.Struct(b'4sI')
-            blockheader = OLDBLOCK.unpack(data)
-            self.code = blockheader[0].partition(b'\0')[0]
-            self.code = DNA_IO.read_data0(blockheader[0])
-            self.size = 0
-            self.addr_old = 0
-            self.sdna_index = 0
-            self.count = 0
-            self.file_offset = 0
+        self.file_offset = handle.tell()

    @property
    def dna_type(self):
@@ -744,74 +703,12 @@ class BlendFileBlockRaw:
 # Read Magic


-class BlendFileHeader:
-    """
-    BlendFileHeader allocates the first 12-17 bytes (depending on the file version) of a blend file
-    it contains information about the hardware architecture
-    """
-    __slots__ = (
-        # str
-        "magic",
-        # int
-        "file_format_version",
-        # int 4/8
-        "pointer_size",
-        # bool
-        "is_little_endian",
-        # int
-        "version",
-        # str, used to pass to 'struct'
-        "endian_str",
-        # int, used to index common types
-        "endian_index",
-    )
+class BlendFileHeader(blendfile_header.BlendFileHeader):
+    endian_index: int
+    endian_str: bytes

    def __init__(self, handle):
-        log.debug("reading blend-file-header")
-
-        bytes_0_6 = handle.read(7)
-        if bytes_0_6 != b'BLENDER':
-            raise BlendFileError("invalid first bytes")
-        self.magic = bytes_0_6
-
-        byte_7 = handle.read(1)
-        is_legacy_header = byte_7 in (b'_', b'-')
-        if is_legacy_header:
-            self.file_format_version = 0
-            if byte_7 == b'_':
-                self.pointer_size = 4
-            elif byte_7 == b'-':
-                self.pointer_size = 8
-            else:
-                raise BlendFileError("invalid file header")
-            byte_8 = handle.read(1)
-            if byte_8 == b'v':
-                self.is_little_endian = True
-            elif byte_8 == b'V':
-                self.is_little_endian = False
-            else:
-                raise BlendFileError("invalid file header")
-            bytes_9_11 = handle.read(3)
-            self.version = int(bytes_9_11)
-        else:
-            byte_8 = handle.read(1)
-            header_size = int(byte_7 + byte_8)
-            if header_size != 17:
-                raise BlendFileError("invalid file header")
-            byte_9 = handle.read(1)
-            if byte_9 != b'-':
-                raise BlendFileError("invalid file header")
-            self.pointer_size = 8
-            byte_10_11 = handle.read(2)
-            self.file_format_version = int(byte_10_11)
-            if self.file_format_version != 1:
-                raise BlendFileError("unsupported file format version")
-            byte_12 = handle.read(1)
-            if byte_12 != b'v':
-                raise BlendFileError("invalid file header")
-            self.is_little_endian = True
-            byte_13_16 = handle.read(4)
-            self.version = int(byte_13_16)
+        super().__init__(handle)

        if self.is_little_endian:
            self.endian_str = b'<'
@@ -820,51 +717,6 @@ class BlendFileHeader:
            self.endian_index = 1
            self.endian_str = b'>'

-    def create_block_header_struct(self):
-        if self.file_format_version == 0:
-            if self.pointer_size == 4:
-                return struct.Struct(b''.join((
-                    self.endian_str,
-                    # BHead4.code
-                    b'4s',
-                    # BHead4.len
-                    b'i',
-                    # BHead4.old
-                    b'I',
-                    # BHead4.SDNAnr
-                    b'i',
-                    # BHead4.nr
-                    b'i',
-                ))), namedtuple('BHead4', ('code', 'len', 'old', 'SDNAnr', 'nr'))
-            else:
-                return struct.Struct(b''.join((
-                    self.endian_str,
-                    # SmallBHead8.code
-                    b'4s',
-                    # SmallBHead8.len
-                    b'i',
-                    # SmallBHead8.old
-                    b'Q',
-                    # SmallBHead8.SDNAnr
-                    b'i',
-                    # SmallBHead8.nr
-                    b'i',
-                ))), namedtuple('SmallBHead8', ('code', 'len', 'old', 'SDNAnr', 'nr'))
-        else:
-            return struct.Struct(b''.join((
-                self.endian_str,
-                # LargeBHead8.code
-                b'4s',
-                # LargeBHead8.SDNAnr
-                b'i',
-                # LargeBHead8.old
-                b'Q',
-                # LargeBHead8.len
-                b'q',
-                # LargeBHead8.nr
-                b'q',
-            ))), namedtuple('LargeBHead8', ('code', 'SDNAnr', 'old', 'len', 'nr'))
-

 class DNAName:
    """