diff --git a/GNUmakefile b/GNUmakefile index 9880411e802..7b75e83b6ab 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -64,6 +64,7 @@ Static Source Code Checking * check_clang_array: Run blender source through clang array checking script (C & C++). * check_struct_comments: Check struct member comments are correct (C & C++). + * check_size_comments: Check array size comments match defines/enums (C & C++). * check_deprecated: Check if there is any deprecated code to remove. * check_descriptions: Check for duplicate/invalid descriptions. * check_licenses: Check license headers follow the SPDX license specification, @@ -502,6 +503,10 @@ check_struct_comments: .FORCE "$(BLENDER_DIR)/tools/check_source/static_check_clang.py" \ --checks=struct_comments --match=".*" --jobs=$(NPROCS) +check_size_comments: .FORCE + $(PYTHON) \ + "$(BLENDER_DIR)/tools/check_source/static_check_size_comments.py" + check_clang_array: .FORCE @$(CMAKE_CONFIG) @cd "$(BUILD_DIR)" ; \ diff --git a/tools/check_source/check_mypy_config.py b/tools/check_source/check_mypy_config.py index 9642bcab6b8..cb9402d73c8 100644 --- a/tools/check_source/check_mypy_config.py +++ b/tools/check_source/check_mypy_config.py @@ -33,6 +33,7 @@ PATHS: tuple[tuple[str, tuple[Any, ...], dict[str, str]], ...] = ( ("tools/check_docs/", (), {}), ("tools/check_source/", (), {'MYPYPATH': "modules"}), ("tools/check_source/check_unused_defines.py", (), {'MYPYPATH': "../utils_maintenance/modules"}), + ("tools/check_source/static_check_size_comments.py", (), {'MYPYPATH': "../utils_maintenance/modules"}), ("tools/config/", (), {}), ("tools/triage/", (), {}), ("tools/utils/", (), {}), diff --git a/tools/check_source/static_check_size_comments.py b/tools/check_source/static_check_size_comments.py new file mode 100755 index 00000000000..fc26dba223f --- /dev/null +++ b/tools/check_source/static_check_size_comments.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2023 Blender Authors +# +# SPDX-License-Identifier: GPL-2.0-or-later + +r""" +Validates sizes in C/C++ sources written as: ``type name[/*MAX_NAME*/ 64]`` +where ``MAX_NAME`` is expected to be a define equal to 64, otherwise a warning is reported. +""" +__all__ = ( + "main", +) + +import os +import sys +import re + +THIS_DIR = os.path.dirname(__file__) +BASE_DIR = os.path.normpath(os.path.abspath(os.path.normpath(os.path.join(THIS_DIR, "..", "..")))) +sys.path.append(os.path.join(THIS_DIR, "..", "utils_maintenance", "modules")) + +from batch_edit_text import run +import line_number_utils + + +# ----------------------------------------------------------------------------- +# Utilities + + +# ----------------------------------------------------------------------------- +# Local Settings + +# TODO, move to config file +SOURCE_DIRS = ( + "source", +) + +SOURCE_EXT = ( + # C/C++ + ".c", ".h", ".cpp", ".hpp", ".cc", ".hh", ".cxx", ".hxx", ".inl", + # Objective C + ".m", ".mm", + # GLSL + ".glsl", +) + +# Mainly useful for development to check extraction & validation are working. +SHOW_SUCCESS = True + + +# ----------------------------------------------------------------------------- +# Globals + + +# Map defines to a list of (filename-split, value) pairs. +global_defines: dict[ + # The define ID. + str, + # Value(s), in case it's defined in multiple files. + list[ + tuple[ + # The `BASE_DIR` relative path (split by `os.sep`). + tuple[str, ...], + # The value of the define, + # a literal string with comments stripped out. + str, + ], + ], +] = {} + + +REGEX_ID_LITERAL = "[A-Za-z_][A-Za-z_0-9]*" + +# Detect: +# `[/*ID*/ 64]`. +# `[/*ID - 2*/ 62]`. +REGEX_SIZE_COMMENT_IN_ARRAY = re.compile("\\[\\/\\*([^\\]]+)\\*\\/\\s*(\\d+)\\]") +# Detect: `#define ID 64` +REGEX_DEFINE_C_LIKE = re.compile("^\\s*#\\s*define\\s+(" + REGEX_ID_LITERAL + ")[ \t]+([^\n]+)", re.MULTILINE) +# Detect: +# `ID = 64,` +# `ID = 64` +REGEX_ENUM_C_LIKE = re.compile("^\\s*(" + REGEX_ID_LITERAL + ")\\s=\\s([^,\n]+)", re.MULTILINE) +# Detect ID's. +REGEX_ID_OR_NUMBER_C_LIKE = re.compile("[A-Za-z0-9_]+") + + +def extract_defines(filepath: str, data_src: str) -> None: + filepath_rel = os.path.relpath(filepath, BASE_DIR) + for regex_matcher in (REGEX_DEFINE_C_LIKE, REGEX_ENUM_C_LIKE): + for m in regex_matcher.finditer(data_src): + value_id = m.group(1) + value_literal = m.group(2) + + # Weak comment stripping. + # This is (arguably) acceptable since the intent is to extract numbers, + # if developers feel the need to write lines such as: + # `#define VALUE_MAX /* Lets make some trouble! */ 64` + # Then they can consider if that's actually needed (sigh!)... + # Otherwise, we could replace this with a full parser such as CLANG, + # however this is a bit of a hassle to setup. + if "//" in value_literal: + value_literal = value_literal.split("//", 1)[0] + if "/*" in value_literal: + value_literal = value_literal.split("/*", 1)[0] + + try: + global_defines[value_id].append((tuple(filepath_rel.split(os.sep)), value_literal)) + except KeyError: + global_defines[value_id] = [(tuple(filepath_rel.split(os.sep)), value_literal)] + + # Returning None indicates the file is not edited. + + +def path_score_distance(a: tuple[str, ...], b: tuple[str, ...]) -> tuple[int, int]: + """ + Compare two paths, to find which paths are "closer" to each-other. + This is used as a tie breaker when defines are found in multiple headers. + """ + count_shared = 0 + range_min = min(len(a), len(b)) + range_max = max(len(a), len(b)) + for i in range(range_min): + if a[i] != b[i]: + break + count_shared += 1 + + count_nested = range_max - count_shared + # Negate shared so smaller is better. + # Less path nesting also gets priority. + return (-count_shared, count_nested) + + +def eval_define( + value_literal: str, + *, + default: str, + filepath_ref_split: tuple[str, ...], +) -> tuple[str, list[str]]: + failed: list[str] = [] + + def re_replace_fn(match: re.Match[str]) -> str: + value = match.group() + if value.isdigit(): + return value + + other_values = global_defines.get(value) + if other_values is None: + failed.append(value) + return value + + if len(other_values) == 1: + other_filepath_split, other_literal = other_values[0] + else: + # Find the "closest" on the file system. + # In practice favor paths which are co-located works fairly well, + # needed as it's now known which headers ID's in a head *could* reference. + other_literal_best = "" + other_score_best = (0, 0) + other_filepath_split_best: tuple[str, ...] = ("",) + + for other_filepath_split_test, other_literal_test in other_values: + other_score_test = path_score_distance(filepath_ref_split, other_filepath_split_test) + if ( + # First time. + (not other_literal_best) or + # A lower score has been found (smaller is better). + (other_score_test < other_score_best) + ): + other_literal_best = other_literal_test + other_score_best = other_score_test + other_filepath_split_best = other_filepath_split_test + del other_score_test + other_literal = other_literal_best + other_filepath_split = other_filepath_split_best + del other_literal_best, other_score_best, other_filepath_split_best + + other_literal_eval, other_failed = eval_define( + other_literal, + default="", + filepath_ref_split=other_filepath_split, + ) + if other_literal_eval: + return other_literal_eval + + # `failed.append(value)` is also valid, report the gestured failure as its more likely to give insights + # into what went wrong. + failed.extend(other_failed) + return value + + # Use integer division. + value_literal = value_literal.replace(r"/", r"//") + + # Populates `failed`. + value_literal_eval = REGEX_ID_OR_NUMBER_C_LIKE.sub(re_replace_fn, value_literal) + + if failed: + # One or more ID could not be found. + return default, failed + + # This could use exception handling, don't unless it's needed though. + # pylint: disable-next=eval-used + return str(eval(value_literal_eval)), failed + + +def validate_sizes(filepath: str, data_src: str) -> None: + # Nicer for printing. + filepath_rel = os.path.relpath(filepath, BASE_DIR) + filepath_rel_split = tuple(filepath_rel.split(os.sep)) + + for m, line, (beg, end) in line_number_utils.finditer_with_line_numbers_and_bounds( + REGEX_SIZE_COMMENT_IN_ARRAY, + data_src, + ): + del end + value_id = m.group(1) + value_literal = m.group(2) + + value_eval, lookups_failed = eval_define( + value_id, + default="", + filepath_ref_split=filepath_rel_split, + ) + + data_line_column = "{:s}:{:d}:{:d}:".format( + filepath_rel, + line + 1, + # Place the cursor after the `[`. + (m.start(0) + 1) - beg, + ) + + if len(value_id.strip()) != len(value_id): + print("WARN:", data_line_column, "comment includes white-space") + continue + + if lookups_failed: + print("WARN:", data_line_column, "[{:s}]".format(", ".join(lookups_failed)), "unknown") + continue + + if value_literal != value_eval: + print("WARN:", data_line_column, value_id, "mismatch", "({:s} != {:s})".format(value_literal, value_eval)) + continue + + if SHOW_SUCCESS: + print("OK: ", data_line_column, "{:s}={:s},".format(value_id, value_literal)) + + # Returning None indicates the file is not edited. + + +def main() -> int: + + # Extract defines. + run( + directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS], + is_text=lambda filepath: filepath.endswith(SOURCE_EXT), + text_operation=extract_defines, + # Can't be used if we want to accumulate in a global variable. + use_multiprocess=False, + ) + + # For predictable lookups on tie breakers. + # In practice it should almost never matter. + for values in global_defines.values(): + if len(values) > 1: + values.sort() + + # Validate sizes. + run( + directories=[os.path.join(BASE_DIR, d) for d in SOURCE_DIRS], + is_text=lambda filepath: filepath.endswith(SOURCE_EXT), + text_operation=validate_sizes, + # Can't be used if we want to accumulate in a global variable. + use_multiprocess=False, + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/utils_maintenance/modules/line_number_utils.py b/tools/utils_maintenance/modules/line_number_utils.py new file mode 100644 index 00000000000..b25bda910f7 --- /dev/null +++ b/tools/utils_maintenance/modules/line_number_utils.py @@ -0,0 +1,88 @@ +# SPDX-FileCopyrightText: 2025 Blender Authors +# +# SPDX-License-Identifier: GPL-2.0-or-later + +""" +When writing text checking utilities, it's not always straightforward +to find line numbers and ranges from an offset within the text. + +This module provides helpers to efficiently do this. + +The main utility is ``finditer_with_line_numbers_and_bounds``, +an alternative to ``re.finditer`` which yields line numbers and offsets +for the line bounds - useful for scanning files and reporting errors that include the line contents. +""" + +__all__ = ( + "finditer_newline_cache_compute", + "finditer_with_line_numbers_and_bounds", + "line_to_offset_range", +) + +from collections.abc import ( + Iterator, +) + +import re as _re + + +def finditer_newline_cache_compute(text: str) -> tuple[dict[int, int], list[int]]: + """ + Return a tuple containing: + Offset to + """ + # Offset to line lookup. + offset_to_line_cache: dict[int, int] = {} + # Line to offset lookup. + line_to_offset_cache: list[int] = [0] + + for i, m in enumerate(_re.finditer("\\n", text), 1): + ofs = m.start() + offset_to_line_cache[ofs] = i + line_to_offset_cache.append(ofs) + + return offset_to_line_cache, line_to_offset_cache + + +def finditer_with_line_numbers_and_bounds( + pattern: str, + text: str, + *, + offset_to_line_cache: dict[int, int] | None = None, + flags: int = 0, +) -> Iterator[tuple[_re.Match[str], int, tuple[int, int]]]: + """ + A version of ``re.finditer`` that returns ``(match, line_number, line_bounds)``. + + Note that ``offset_to_line_cache`` is the first return value from + ``finditer_newline_cache_compute``. + This should be passed in if the iterator is called multiple times + on the same buffer, to avoid calculating this every time. + """ + if offset_to_line_cache is None: + offset_to_line_cache, line_to_offset_cache = finditer_newline_cache_compute(text) + del line_to_offset_cache + + text_len = len(text) + for m in _re.finditer(pattern, text, flags): + + if (beg := text.rfind("\n", 0, m.start())) == -1: + beg = 0 + line_number = 0 + else: + line_number = offset_to_line_cache[beg] + + if (end := text.find("\n", m.end(), text_len)) == -1: + end = text_len + + yield m, line_number, (beg, end) + + +def line_to_offset_range(line: int, offset_limit: int, line_to_offset_cache: list[int]) -> tuple[int, int]: + """ + Given an offset, return line bounds. + """ + assert line >= 0 + beg = line_to_offset_cache[line] + end = line_to_offset_cache[line + 1] if (line + 1 < len(line_to_offset_cache)) else offset_limit + return beg, end