Tools: add a utility to validate array sizes

The script check_source/static_check_size_comments.py run directly
or called via the convenience target "make check_size_comments".

Add a utility module: `line_number_utils` which implements
a version of `re.finditer` that includes line numbers & ranges.
This commit is contained in:
Campbell Barton
2025-05-23 04:01:10 +00:00
parent 84694bf635
commit 0265b13399
4 changed files with 374 additions and 0 deletions

View File

@@ -64,6 +64,7 @@ Static Source Code Checking
* check_clang_array: Run blender source through clang array checking script (C & C++). * check_clang_array: Run blender source through clang array checking script (C & C++).
* check_struct_comments: Check struct member comments are correct (C & C++). * check_struct_comments: Check struct member comments are correct (C & C++).
* check_size_comments: Check array size comments match defines/enums (C & C++).
* check_deprecated: Check if there is any deprecated code to remove. * check_deprecated: Check if there is any deprecated code to remove.
* check_descriptions: Check for duplicate/invalid descriptions. * check_descriptions: Check for duplicate/invalid descriptions.
* check_licenses: Check license headers follow the SPDX license specification, * check_licenses: Check license headers follow the SPDX license specification,
@@ -502,6 +503,10 @@ check_struct_comments: .FORCE
"$(BLENDER_DIR)/tools/check_source/static_check_clang.py" \ "$(BLENDER_DIR)/tools/check_source/static_check_clang.py" \
--checks=struct_comments --match=".*" --jobs=$(NPROCS) --checks=struct_comments --match=".*" --jobs=$(NPROCS)
check_size_comments: .FORCE
$(PYTHON) \
"$(BLENDER_DIR)/tools/check_source/static_check_size_comments.py"
check_clang_array: .FORCE check_clang_array: .FORCE
@$(CMAKE_CONFIG) @$(CMAKE_CONFIG)
@cd "$(BUILD_DIR)" ; \ @cd "$(BUILD_DIR)" ; \

View File

@@ -33,6 +33,7 @@ PATHS: tuple[tuple[str, tuple[Any, ...], dict[str, str]], ...] = (
("tools/check_docs/", (), {}), ("tools/check_docs/", (), {}),
("tools/check_source/", (), {'MYPYPATH': "modules"}), ("tools/check_source/", (), {'MYPYPATH': "modules"}),
("tools/check_source/check_unused_defines.py", (), {'MYPYPATH': "../utils_maintenance/modules"}), ("tools/check_source/check_unused_defines.py", (), {'MYPYPATH': "../utils_maintenance/modules"}),
("tools/check_source/static_check_size_comments.py", (), {'MYPYPATH': "../utils_maintenance/modules"}),
("tools/config/", (), {}), ("tools/config/", (), {}),
("tools/triage/", (), {}), ("tools/triage/", (), {}),
("tools/utils/", (), {}), ("tools/utils/", (), {}),

View File

@@ -0,0 +1,280 @@
#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2023 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
r"""
Validates sizes in C/C++ sources written as: ``type name[/*MAX_NAME*/ 64]``
where ``MAX_NAME`` is expected to be a define equal to 64, otherwise a warning is reported.
"""
__all__ = (
"main",
)
import os
import sys
import re
THIS_DIR = os.path.dirname(__file__)
BASE_DIR = os.path.normpath(os.path.abspath(os.path.normpath(os.path.join(THIS_DIR, "..", ".."))))
sys.path.append(os.path.join(THIS_DIR, "..", "utils_maintenance", "modules"))
from batch_edit_text import run
import line_number_utils
# -----------------------------------------------------------------------------
# Utilities
# -----------------------------------------------------------------------------
# Local Settings
# TODO, move to config file
SOURCE_DIRS = (
"source",
)
SOURCE_EXT = (
# C/C++
".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl",
# Objective C
".m", ".mm",
# GLSL
".glsl",
)
# Mainly useful for development to check extraction & validation are working.
SHOW_SUCCESS = True
# -----------------------------------------------------------------------------
# Globals
# Map defines to a list of (filename-split, value) pairs.
global_defines: dict[
# The define ID.
str,
# Value(s), in case it's defined in multiple files.
list[
tuple[
# The `BASE_DIR` relative path (split by `os.sep`).
tuple[str, ...],
# The value of the define,
# a literal string with comments stripped out.
str,
],
],
] = {}
REGEX_ID_LITERAL = "[A-Za-z_][A-Za-z_0-9]*"
# Detect:
# `[/*ID*/ 64]`.
# `[/*ID - 2*/ 62]`.
REGEX_SIZE_COMMENT_IN_ARRAY = re.compile("\\[\\/\\*([^\\]]+)\\*\\/\\s*(\\d+)\\]")
# Detect: `#define ID 64`
REGEX_DEFINE_C_LIKE = re.compile("^\\s*#\\s*define\\s+(" + REGEX_ID_LITERAL + ")[ \t]+([^\n]+)", re.MULTILINE)
# Detect:
# `ID = 64,`
# `ID = 64`
REGEX_ENUM_C_LIKE = re.compile("^\\s*(" + REGEX_ID_LITERAL + ")\\s=\\s([^,\n]+)", re.MULTILINE)
# Detect ID's.
REGEX_ID_OR_NUMBER_C_LIKE = re.compile("[A-Za-z0-9_]+")
def extract_defines(filepath: str, data_src: str) -> None:
filepath_rel = os.path.relpath(filepath, BASE_DIR)
for regex_matcher in (REGEX_DEFINE_C_LIKE, REGEX_ENUM_C_LIKE):
for m in regex_matcher.finditer(data_src):
value_id = m.group(1)
value_literal = m.group(2)
# Weak comment stripping.
# This is (arguably) acceptable since the intent is to extract numbers,
# if developers feel the need to write lines such as:
# `#define VALUE_MAX /* Lets make some trouble! */ 64`
# Then they can consider if that's actually needed (sigh!)...
# Otherwise, we could replace this with a full parser such as CLANG,
# however this is a bit of a hassle to setup.
if "//" in value_literal:
value_literal = value_literal.split("//", 1)[0]
if "/*" in value_literal:
value_literal = value_literal.split("/*", 1)[0]
try:
global_defines[value_id].append((tuple(filepath_rel.split(os.sep)), value_literal))
except KeyError:
global_defines[value_id] = [(tuple(filepath_rel.split(os.sep)), value_literal)]
# Returning None indicates the file is not edited.
def path_score_distance(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[int, int]:
"""
Compare two paths, to find which paths are "closer" to each-other.
This is used as a tie breaker when defines are found in multiple headers.
"""
count_shared = 0
range_min = min(len(a), len(b))
range_max = max(len(a), len(b))
for i in range(range_min):
if a[i] != b[i]:
break
count_shared += 1
count_nested = range_max - count_shared
# Negate shared so smaller is better.
# Less path nesting also gets priority.
return (-count_shared, count_nested)
def eval_define(
value_literal: str,
*,
default: str,
filepath_ref_split: tuple[str, ...],
) -> tuple[str, list[str]]:
failed: list[str] = []
def re_replace_fn(match: re.Match[str]) -> str:
value = match.group()
if value.isdigit():
return value
other_values = global_defines.get(value)
if other_values is None:
failed.append(value)
return value
if len(other_values) == 1:
other_filepath_split, other_literal = other_values[0]
else:
# Find the "closest" on the file system.
# In practice favor paths which are co-located works fairly well,
# needed as it's now known which headers ID's in a head *could* reference.
other_literal_best = ""
other_score_best = (0, 0)
other_filepath_split_best: tuple[str, ...] = ("",)
for other_filepath_split_test, other_literal_test in other_values:
other_score_test = path_score_distance(filepath_ref_split, other_filepath_split_test)
if (
# First time.
(not other_literal_best) or
# A lower score has been found (smaller is better).
(other_score_test < other_score_best)
):
other_literal_best = other_literal_test
other_score_best = other_score_test
other_filepath_split_best = other_filepath_split_test
del other_score_test
other_literal = other_literal_best
other_filepath_split = other_filepath_split_best
del other_literal_best, other_score_best, other_filepath_split_best
other_literal_eval, other_failed = eval_define(
other_literal,
default="",
filepath_ref_split=other_filepath_split,
)
if other_literal_eval:
return other_literal_eval
# `failed.append(value)` is also valid, report the gestured failure as its more likely to give insights
# into what went wrong.
failed.extend(other_failed)
return value
# Use integer division.
value_literal = value_literal.replace(r"/", r"//")
# Populates `failed`.
value_literal_eval = REGEX_ID_OR_NUMBER_C_LIKE.sub(re_replace_fn, value_literal)
if failed:
# One or more ID could not be found.
return default, failed
# This could use exception handling, don't unless it's needed though.
# pylint: disable-next=eval-used
return str(eval(value_literal_eval)), failed
def validate_sizes(filepath: str, data_src: str) -> None:
# Nicer for printing.
filepath_rel = os.path.relpath(filepath, BASE_DIR)
filepath_rel_split = tuple(filepath_rel.split(os.sep))
for m, line, (beg, end) in line_number_utils.finditer_with_line_numbers_and_bounds(
REGEX_SIZE_COMMENT_IN_ARRAY,
data_src,
):
del end
value_id = m.group(1)
value_literal = m.group(2)
value_eval, lookups_failed = eval_define(
value_id,
default="",
filepath_ref_split=filepath_rel_split,
)
data_line_column = "{:s}:{:d}:{:d}:".format(
filepath_rel,
line + 1,
# Place the cursor after the `[`.
(m.start(0) + 1) - beg,
)
if len(value_id.strip()) != len(value_id):
print("WARN:", data_line_column, "comment includes white-space")
continue
if lookups_failed:
print("WARN:", data_line_column, "[{:s}]".format(", ".join(lookups_failed)), "unknown")
continue
if value_literal != value_eval:
print("WARN:", data_line_column, value_id, "mismatch", "({:s} != {:s})".format(value_literal, value_eval))
continue
if SHOW_SUCCESS:
print("OK: ", data_line_column, "{:s}={:s},".format(value_id, value_literal))
# Returning None indicates the file is not edited.
def main() -> int:
# Extract defines.
run(
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
text_operation=extract_defines,
# Can't be used if we want to accumulate in a global variable.
use_multiprocess=False,
)
# For predictable lookups on tie breakers.
# In practice it should almost never matter.
for values in global_defines.values():
if len(values) > 1:
values.sort()
# Validate sizes.
run(
directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS],
is_text=lambda filepath: filepath.endswith(SOURCE_EXT),
text_operation=validate_sizes,
# Can't be used if we want to accumulate in a global variable.
use_multiprocess=False,
)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,88 @@
# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
"""
When writing text checking utilities, it's not always straightforward
to find line numbers and ranges from an offset within the text.
This module provides helpers to efficiently do this.
The main utility is ``finditer_with_line_numbers_and_bounds``,
an alternative to ``re.finditer`` which yields line numbers and offsets
for the line bounds - useful for scanning files and reporting errors that include the line contents.
"""
__all__ = (
"finditer_newline_cache_compute",
"finditer_with_line_numbers_and_bounds",
"line_to_offset_range",
)
from collections.abc import (
Iterator,
)
import re as _re
def finditer_newline_cache_compute(text: str) -> tuple[dict[int, int], list[int]]:
"""
Return a tuple containing:
Offset to
"""
# Offset to line lookup.
offset_to_line_cache: dict[int, int] = {}
# Line to offset lookup.
line_to_offset_cache: list[int] = [0]
for i, m in enumerate(_re.finditer("\\n", text), 1):
ofs = m.start()
offset_to_line_cache[ofs] = i
line_to_offset_cache.append(ofs)
return offset_to_line_cache, line_to_offset_cache
def finditer_with_line_numbers_and_bounds(
pattern: str,
text: str,
*,
offset_to_line_cache: dict[int, int] | None = None,
flags: int = 0,
) -> Iterator[tuple[_re.Match[str], int, tuple[int, int]]]:
"""
A version of ``re.finditer`` that returns ``(match, line_number, line_bounds)``.
Note that ``offset_to_line_cache`` is the first return value from
``finditer_newline_cache_compute``.
This should be passed in if the iterator is called multiple times
on the same buffer, to avoid calculating this every time.
"""
if offset_to_line_cache is None:
offset_to_line_cache, line_to_offset_cache = finditer_newline_cache_compute(text)
del line_to_offset_cache
text_len = len(text)
for m in _re.finditer(pattern, text, flags):
if (beg := text.rfind("\n", 0, m.start())) == -1:
beg = 0
line_number = 0
else:
line_number = offset_to_line_cache[beg]
if (end := text.find("\n", m.end(), text_len)) == -1:
end = text_len
yield m, line_number, (beg, end)
def line_to_offset_range(line: int, offset_limit: int, line_to_offset_cache: list[int]) -> tuple[int, int]:
"""
Given an offset, return line bounds.
"""
assert line >= 0
beg = line_to_offset_cache[line]
end = line_to_offset_cache[line + 1] if (line + 1 < len(line_to_offset_cache)) else offset_limit
return beg, end