Core: extract blendfile_header.py as common utility for parsing .blend files

This new file can parse the file header (first few bytes) as well as the block
headers.

Right now, this is used by two places:
* `blendfile.py` which is used by `blend2json.py`
* `blend_render_info.py`

This new module is shipped with Blender because it's needed for
`blend_render_info.py` which is shipped with Blender too. This makes using it in
`blendfile.py` (which is not shipped with Blender) a bit more annoying. However,
this is already not ideal, because e.g. `blend2json` also has to add to
`sys.path` already to be able to import `blendfile.py`.

This new file could also be used by blender-asset-tracer (BAT).

The new `BlendFileHeader` and `BlockHeader` types may be subclassed by code
using it, because it wants to store additional derived data (`blendfile.py` and
BAT need this).

New tests have been added that check that the file and block header is parsed
correctly for different kinds of .blend files.

Pull Request: https://projects.blender.org/blender/blender/pulls/140341
This commit is contained in:
Jacques Lucke
2025-06-23 12:53:55 +02:00
parent a5399af388
commit f0c7e52ff2
10 changed files with 468 additions and 220 deletions

View File

@@ -4,20 +4,14 @@
# SPDX-License-Identifier: GPL-2.0-or-later
# This module can get render info without running from inside blender.
#
# This struct won't change according to Ton.
# Note that the size differs on 32/64bit
#
# typedef struct BHead {
# int code, len;
# void *old;
# int SDNAnr, nr;
# } BHead;
__all__ = (
"read_blend_rend_chunk",
)
import blendfile_header
class RawBlendFileReader:
"""
@@ -64,75 +58,51 @@ class RawBlendFileReader:
return False
def get_render_info_structure(endian_str, size):
import struct
# The maximum size of the scene name changed over time, so create a different
# structure depending on the size of the entire block.
if size == 2 * 4 + 24:
return struct.Struct(endian_str + b'ii24s')
if size == 2 * 4 + 64:
return struct.Struct(endian_str + b'ii64s')
if size == 2 * 4 + 256:
return struct.Struct(endian_str + b'ii256s')
raise ValueError("Unknown REND chunk size: {:d}".format(size))
def _read_blend_rend_chunk_from_file(blendfile, filepath):
import struct
import sys
from os import SEEK_CUR
head = blendfile.read(7)
if head != b'BLENDER':
try:
blender_header = blendfile_header.BlendFileHeader(blendfile)
except blendfile_header.BlendHeaderError:
sys.stderr.write("Not a blend file: {:s}\n".format(filepath))
return []
is_64_bit = (blendfile.read(1) == b'-')
# true for PPC, false for X86
is_big_endian = (blendfile.read(1) == b'V')
# Now read the bhead chunk!
blendfile.seek(3, SEEK_CUR) # Skip the version.
scenes = []
sizeof_bhead = 24 if is_64_bit else 20
endian_str = b'<' if blender_header.is_little_endian else b'>'
# Should always be 4, but a malformed/corrupt file may be less.
while (bhead_id := blendfile.read(4)) != b'ENDB':
if len(bhead_id) != 4:
sys.stderr.write("Unable to read until ENDB block (corrupt file): {:s}\n".format(filepath))
block_header_struct = blender_header.create_block_header_struct()
while bhead := blendfile_header.BlockHeader(blendfile, block_header_struct):
if bhead.code == b'ENDB':
break
sizeof_data_left = struct.unpack('>i' if is_big_endian else '<i', blendfile.read(4))[0]
if sizeof_data_left < 0:
# Very unlikely, but prevent other errors.
sys.stderr.write("Negative block size found (corrupt file): {:s}\n".format(filepath))
break
# 4 from the `head_id`, another 4 for the size of the BHEAD.
sizeof_bhead_left = sizeof_bhead - 8
# The remainder of the BHEAD struct is not used.
blendfile.seek(sizeof_bhead_left, SEEK_CUR)
if bhead_id == b'REND':
# Now we want the scene name, start and end frame. this is 32bits long.
start_frame, end_frame = struct.unpack('>2i' if is_big_endian else '<2i', blendfile.read(8))
sizeof_data_left -= 8
scene_name = blendfile.read(64)
sizeof_data_left -= 64
if b'\0' not in scene_name:
if sizeof_data_left >= 192:
# Assume new, up to 256 bytes name.
scene_name += blendfile.read(192)
sizeof_data_left -= 192
if b'\0' not in scene_name:
scene_name = scene_name[:-1] + b'\0'
remaining_bytes = bhead.size
if bhead.code == b'REND':
rend_block_struct = get_render_info_structure(endian_str, bhead.size)
start_frame, end_frame, scene_name = rend_block_struct.unpack(blendfile.read(rend_block_struct.size))
remaining_bytes -= rend_block_struct.size
scene_name = scene_name[:scene_name.index(b'\0')]
# It's possible old blend files are not UTF8 compliant, use `surrogateescape`.
scene_name = scene_name.decode("utf8", errors="surrogateescape")
scenes.append((start_frame, end_frame, scene_name))
if sizeof_data_left > 0:
blendfile.seek(sizeof_data_left, SEEK_CUR)
elif sizeof_data_left < 0:
# Very unlikely, but prevent attempting to further parse corrupt data.
sys.stderr.write("Error calculating next block (corrupt file): {:s}\n".format(filepath))
break
blendfile.seek(remaining_bytes, SEEK_CUR)
return scenes

View File

@@ -0,0 +1,234 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
'''
This module contains utility classes for reading headers in .blend files.
This is a pure Python implementation of the corresponding C++ code in Blender
in BLO_core_blend_header.hh and BLO_core_bhead.hh.
'''
import os
import struct
import typing
from dataclasses import dataclass
class BlendHeaderError(Exception):
pass
@dataclass
class BHead4:
code: bytes
len: int
old: int
SDNAnr: int
nr: int
@dataclass
class SmallBHead8:
code: bytes
len: int
old: int
SDNAnr: int
nr: int
@dataclass
class LargeBHead8:
code: bytes
SDNAnr: int
old: int
len: int
nr: int
@dataclass
class BlockHeaderStruct:
# Binary format of the encoded header.
struct: struct.Struct
# Corresponding Python type for retrieving block header values.
type: typing.Type[typing.Union[BHead4, SmallBHead8, LargeBHead8]]
@property
def size(self) -> int:
return self.struct.size
def parse(self, data: bytes) -> typing.Union[BHead4, SmallBHead8, LargeBHead8]:
return self.type(*self.struct.unpack(data))
class BlendFileHeader:
"""
BlendFileHeader represents the first 12-17 bytes of a blend file.
It contains information about the hardware architecture, which is relevant
to the structure of the rest of the file.
"""
# Always 'BLENDER'.
magic: bytes
# Currently always 0 or 1.
file_format_version: int
# Either 4 or 8.
pointer_size: int
# Endianness of values stored in the file.
is_little_endian: bool
# Blender version the file has been written with.
# The last two digits are the minor version. So 280 is 2.80.
version: int
def __init__(self, file: typing.IO[bytes]) -> None:
file.seek(0, os.SEEK_SET)
bytes_0_6 = file.read(7)
if bytes_0_6 != b'BLENDER':
raise BlendHeaderError("invalid first bytes %r" % bytes_0_6)
self.magic = bytes_0_6
byte_7 = file.read(1)
is_legacy_header = byte_7 in (b'_', b'-')
if is_legacy_header:
self.file_format_version = 0
if byte_7 == b'_':
self.pointer_size = 4
elif byte_7 == b'-':
self.pointer_size = 8
else:
raise BlendHeaderError("invalid pointer size %r" % byte_7)
byte_8 = file.read(1)
if byte_8 == b'v':
self.is_little_endian = True
elif byte_8 == b'V':
self.is_little_endian = False
else:
raise BlendHeaderError("invalid endian indicator %r" % byte_8)
bytes_9_11 = file.read(3)
self.version = int(bytes_9_11)
else:
byte_8 = file.read(1)
header_size = int(byte_7 + byte_8)
if header_size != 17:
raise BlendHeaderError("unknown file header size %d" % header_size)
byte_9 = file.read(1)
if byte_9 != b'-':
raise BlendHeaderError("invalid file header")
self.pointer_size = 8
byte_10_11 = file.read(2)
self.file_format_version = int(byte_10_11)
if self.file_format_version != 1:
raise BlendHeaderError("unsupported file format version %r" % self.file_format_version)
byte_12 = file.read(1)
if byte_12 != b'v':
raise BlendHeaderError("invalid file header")
self.is_little_endian = True
byte_13_16 = file.read(4)
self.version = int(byte_13_16)
def create_block_header_struct(self) -> BlockHeaderStruct:
assert self.file_format_version in (0, 1)
endian_str = b'<' if self.is_little_endian else b'>'
if self.file_format_version == 1:
header_struct = struct.Struct(b''.join((
endian_str,
# LargeBHead8.code
b'4s',
# LargeBHead8.SDNAnr
b'i',
# LargeBHead8.old
b'Q',
# LargeBHead8.len
b'q',
# LargeBHead8.nr
b'q',
)))
return BlockHeaderStruct(header_struct, LargeBHead8)
if self.pointer_size == 4:
header_struct = struct.Struct(b''.join((
endian_str,
# BHead4.code
b'4s',
# BHead4.len
b'i',
# BHead4.old
b'I',
# BHead4.SDNAnr
b'i',
# BHead4.nr
b'i',
)))
return BlockHeaderStruct(header_struct, BHead4)
assert self.pointer_size == 8
header_struct = struct.Struct(b''.join((
endian_str,
# SmallBHead8.code
b'4s',
# SmallBHead8.len
b'i',
# SmallBHead8.old
b'Q',
# SmallBHead8.SDNAnr
b'i',
# SmallBHead8.nr
b'i',
)))
return BlockHeaderStruct(header_struct, SmallBHead8)
class BlockHeader:
"""
A .blend file consists of a sequence of blocks whereby each block has a header.
This class can parse a header block in a specific .blend file.
Note the binary representation of this header is different for different files.
This class provides a unified interface for these underlying representations.
"""
__slots__ = (
"code",
"size",
"addr_old",
"sdna_index",
"count",
)
# Indicates the type of the block. See BLO_CODE_* in BLO_core_bhead.hh.
code: bytes
# Number of bytes in the block.
size: int
# Old pointer/identifier of the block.
addr_old: int
# DNA struct index of the data in the block.
sdna_index: int
# Number of DNA structures in the block.
count: int
def __init__(self, file: typing.IO[bytes], block_header_struct: BlockHeaderStruct) -> None:
data = file.read(block_header_struct.size)
if len(data) != block_header_struct.size:
if len(data) != 8:
raise BlendHeaderError("invalid block header size")
legacy_endb = struct.Struct(b'4sI')
endb_header = legacy_endb.unpack(data)
if endb_header[0] != b'ENDB':
raise BlendHeaderError("invalid block header")
self.code = b'ENDB'
self.size = 0
self.addr_old = 0
self.sdna_index = 0
self.count = 0
return
header = block_header_struct.parse(data)
self.code = header.code.partition(b'\0')[0]
self.size = header.len
self.addr_old = header.old
self.sdna_index = header.SDNAnr
self.count = header.nr