diff --git a/GNUmakefile b/GNUmakefile index 058bb594d87..a55d3771094 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -59,6 +59,7 @@ Static Source Code Checking * check_cppcheck: Run blender source through cppcheck (C & C++). * check_clang_array: Run blender source through clang array checking script (C & C++). + * check_struct_comments: Check struct member comments are correct (C & C++). * check_deprecated: Check if there is any deprecated code to remove. * check_descriptions: Check for duplicate/invalid descriptions. * check_licenses: Check license headers follow the SPDX license specification, @@ -468,6 +469,13 @@ check_cppcheck: .FORCE "$(BLENDER_DIR)/check_cppcheck.txt" @echo "written: check_cppcheck.txt" +check_struct_comments: .FORCE + @$(CMAKE_CONFIG) + @cd "$(BUILD_DIR)" ; \ + $(PYTHON) \ + "$(BLENDER_DIR)/build_files/cmake/cmake_static_check_clang.py" \ + --checks=struct_comments --match=".*" + check_clang_array: .FORCE @$(CMAKE_CONFIG) @cd "$(BUILD_DIR)" ; \ diff --git a/build_files/cmake/cmake_static_check_clang.py b/build_files/cmake/cmake_static_check_clang.py new file mode 100644 index 00000000000..1572b59caef --- /dev/null +++ b/build_files/cmake/cmake_static_check_clang.py @@ -0,0 +1,544 @@ +#!/usr/bin/env python3 +# SPDX-FileCopyrightText: 2023 Blender Foundation +# +# SPDX-License-Identifier: GPL-2.0-or-later + +""" +A command line utility to check Blender's source code with CLANG's Python module. + +To call this directly: + +export CLANG_LIB_DIR=/usr/lib64 +cd {BUILD_DIR} +python ../blender/build_files/cmake/cmake_static_check_clang.py --match=".*" --checks=struct_comments + +""" + +import argparse +import os +import re +import sys + +from typing import ( + Any, + Dict, + List, + Type, + Sequence, + Tuple, +) + + +import project_source_info + +# pylint: disable-next=import-outside-toplevel +import clang # type: ignore +# pylint: disable-next=import-outside-toplevel +import clang.cindex # type: ignore +from clang.cindex import ( + CursorKind, +) + +# Only for readability. +ClangNode = Any +ClangTranslationUnit = Any +ClangSourceLocation = Any + + +USE_VERBOSE = os.environ.get("VERBOSE", None) is not None + +# Turn off for debugging. +USE_MULTIPROCESS = True + +CLANG_BIND_DIR = os.environ.get("CLANG_BIND_DIR") +CLANG_LIB_DIR = os.environ.get("CLANG_LIB_DIR") + +if CLANG_BIND_DIR is None: + print("$CLANG_BIND_DIR python binding dir not set") +if CLANG_LIB_DIR is None: + print("$CLANG_LIB_DIR clang lib dir not set") + +if CLANG_LIB_DIR: + clang.cindex.Config.set_library_path(CLANG_LIB_DIR) +if CLANG_BIND_DIR: + sys.path.append(CLANG_BIND_DIR) + + +CHECKER_IGNORE_PREFIX = [ + "extern", +] + +CHECKER_EXCLUDE_SOURCE_FILES = set(os.path.join(*f.split("/")) for f in ( + # Skip parsing these large (mostly data files). + "source/blender/editors/space_text/text_format_pov.cc", + "source/blender/editors/space_text/text_format_pov_ini.cc", +)) + + +# ----------------------------------------------------------------------------- +# Utility Functions + +def clang_source_location_as_str(source_location: ClangSourceLocation) -> str: + return "{:s}:{:d}:{:d}:".format(str(source_location.file), source_location.line, source_location.column) + + +# ----------------------------------------------------------------------------- +# Checkers + +class ClangChecker: + """ + Base class for checkers. + + Notes: + + - The function ``check_source`` takes file_data as bytes instead of a string + because the offsets provided by CLANG are byte offsets. + While the offsets could be converted into UNICODE offset's, + there doesn't seem to be an efficient & convenient way to do that. + """ + __slots__ = () + + def __new__(cls, *args: Tuple[Any], **kwargs: Dict[str, Any]) -> Any: + raise RuntimeError("%s should not be instantiated" % cls) + + @staticmethod + def check_source( + _filepath: str, + _file_data: bytes, + _tu: ClangTranslationUnit, + _shared_check_data: Any, + ) -> List[str]: + raise RuntimeError("This function must be overridden by it's subclass!") + return [] + + @staticmethod + def setup() -> Any: + return None + + @staticmethod + def teardown(_shared_check_data: Any) -> None: + pass + + +class clang_checkers: + # fake module. + + class struct_comments(ClangChecker): + """ + Ensure comments in struct declarations match the members of the struct, e.g: + + SomeStruct var = { + /*name*/ "Text", + /*children*/ nullptr, + /*flag*/ 0, + }; + + Will generate a warning if any of the names in the prefix comments don't match the struct member names. + """ + + _struct_comments_ignore = { + # `PyTypeObject` uses compile time members that vary (see: #PyVarObject_HEAD_INIT macro) + # While some clever comment syntax could be supported to signify multiple/optional members + # this is such a specific case that it's simpler to skip this warning. + "PyTypeObject": {"ob_base": {"ob_size"}}, + } + + @staticmethod + def _struct_check_comments_recursive( + # Static (unchanged for each recursion). + filepath: str, + file_data: bytes, + # Different for each recursion. + node: ClangNode, + node_parent: ClangNode, + level: int, + # Used to build data. + struct_decl_map: Dict[str, ClangNode], + struct_type_map: Dict[str, str], + output: List[str], + ) -> None: + + # Needed to read back the node. + if USE_VERBOSE: + print("TRY:", node.kind, node.spelling, len(list(node.get_tokens())), level, node.location) + + # if node.kind == CursorKind.VAR_DECL and node.spelling == "Vector_NumMethods": + # import IPython + # IPython.embed() + + if node.kind == CursorKind.STRUCT_DECL: + struct_type = node.spelling.strip() + if not struct_type: + # The parent may be a `typedef [..] TypeID` where `[..]` is `struct { a; b; c; }`. + # Inspect the parent. + if node_parent is not None and (node_parent.kind == CursorKind.TYPEDEF_DECL): + tokens = list(node_parent.get_tokens()) + if tokens[0].spelling == "typedef": + struct_type = tokens[-1].spelling + + struct_decl_map[struct_type] = node + + # Ignore declarations for anything defined outside this file. + if str(node.location.file) == filepath: + if node.kind == CursorKind.INIT_LIST_EXPR: + if USE_VERBOSE: + print(node.spelling, node.location) + # Split to avoid `const struct` .. and similar. + # NOTE: there may be an array size suffix, e.g. `[4]`. + # This could be supported. + struct_type = node.type.spelling.split()[-1] + struct = struct_decl_map.get(struct_type) + if struct is None: + if USE_VERBOSE: + print("NOT FOUND:", struct_type) + struct_type = struct_type_map.get(struct_type) + if struct_type is not None: + struct = struct_decl_map.get(struct_type) + + if USE_VERBOSE: + print("INSPECTING STRUCT:", struct_type) + if struct is not None: + member_names = [ + node_child.spelling for node_child in struct.get_children() + if node_child.kind == CursorKind.FIELD_DECL + ] + # if struct_type == "PyMappingMethods": + # import IPython + # IPython.embed() + + children = list(node.get_children()) + comment_names = [] + + # Set to true when there is a comment directly before a value, + # this is needed because: + # - Comments on the previous line are rarely intended to be identifiers of the struct member. + # - Comments which _are_ intended to be identifiers can be wrapped onto new-lines + # so they should not be ignored. + # + # While it's possible every member is wrapped onto a new-line, + # this is highly unlikely. + comment_names_prefix_any = False + + for node_child in children: + # Extract the content before the child + # (typically a C-style comment containing the struct member). + end = min(node_child.location.offset, len(file_data)) + + # It's possible this ID has a preceding "name::space::etc" + # which should be skipped. + while end > 0 and ((ch := bytes((file_data[end - 1],))).isalpha() or ch == b":"): + end -= 1 + + has_newline = False + while end > 0: + ch = bytes((file_data[end - 1],)) + if ch in {b"\t", b" "}: + end -= 1 + elif ch == b"\n": + end -= 1 + has_newline = True + else: + break + + beg = end - 1 + while beg != 0 and bytes((file_data[beg],)) != b"\n": + beg -= 1 + text = file_data[beg:end] + if text.lstrip().startswith(b"/*"): + if not has_newline: + comment_names_prefix_any = True + else: + text = b"" + comment_names.append(text.decode('utf-8')) + + if USE_VERBOSE: + print(member_names) + print(comment_names) + + total = min(len(member_names), len(comment_names)) + + if total != 0 and comment_names_prefix_any: + result = [""] * total + count_found = 0 + count_invalid = 0 + for i in range(total): + comment = comment_names[i] + if "/*" in comment and "*/" in comment: + comment = comment.strip().strip("/").strip("*") + if comment == member_names[i]: + count_found += 1 + else: + suppress_warning = False + if ( + skip_members_table := + clang_checkers.struct_comments._struct_comments_ignore.get( + node_parent.type.spelling, + ) + ) is not None: + if (skip_members := skip_members_table.get(comment)) is not None: + if member_names[i] in skip_members: + suppress_warning = True + + if not suppress_warning: + result[i] = "Incorrect! found \"{:s}\" expected \"{:s}\"".format( + comment, member_names[i]) + count_invalid += 1 + else: + result[i] = "No comment for \"{:s}\"".format(member_names[i]) + if count_found == 0 and count_invalid == 0: + # No comments used, skip this as not all declaration use this comment style. + output.append( + "NONE: {:s} {:s}".format( + clang_source_location_as_str(node.location), + node.type.spelling, + ) + ) + elif count_found != total: + for i in range(total): + if result[i]: + output.append( + "FAIL: {:s} {:s}".format( + clang_source_location_as_str(children[i].location), + result[i], + ) + ) + else: + output.append( + "OK: {:s} {:s}".format( + clang_source_location_as_str(node.location), + node.type.spelling, + ) + ) + + for node_child in node.get_children(): + clang_checkers.struct_comments._struct_check_comments_recursive( + filepath, file_data, + node_child, node, level + 1, + struct_decl_map, struct_type_map, output, + ) + + @staticmethod + def check_source( + filepath: str, + file_data: bytes, + tu: ClangTranslationUnit, + _shared_check_data: Any) -> List[str]: + output: List[str] = [] + + struct_decl_map: Dict[str, Any] = {} + struct_type_map: Dict[str, str] = {} + clang_checkers.struct_comments._struct_check_comments_recursive( + filepath, file_data, + tu.cursor, None, 0, + struct_decl_map, struct_type_map, output, + ) + + return output + + +# ----------------------------------------------------------------------------- +# Checker Class Access + +def check_function_get_all() -> List[str]: + checkers = [] + for name in dir(clang_checkers): + value = getattr(clang_checkers, name) + if isinstance(value, type) and issubclass(value, ClangChecker): + checkers.append(name) + checkers.sort() + return checkers + + +def check_class_from_id(name: str) -> Type[ClangChecker]: + result = getattr(clang_checkers, name) + assert issubclass(result, ClangChecker) + # MYPY 0.812 doesn't recognize the assert above. + return result # type: ignore + + +def check_docstring_from_id(name: str) -> str: + from textwrap import dedent + result = getattr(clang_checkers, name).__doc__ + return dedent(result or '').strip('\n') + '\n' + + +# ----------------------------------------------------------------------------- +# Generic Clang Checker + +def check_source_file( + filepath: str, + args: Sequence[str], + check_ids: Sequence[str], + shared_check_data_foreach_check: Sequence[Any], +) -> str: + index = clang.cindex.Index.create() + try: + tu = index.parse(filepath, args) + except clang.cindex.TranslationUnitLoadError as ex: + return "PARSE_ERROR: {:s} {!r}".format(filepath, ex) + + with open(filepath, "rb") as fh: + file_data = fh.read() + + output: List[str] = [] + + # we don't really care what we are looking at, just scan entire file for + # function calls. + for check, shared_check_data in zip(check_ids, shared_check_data_foreach_check): + cls = check_class_from_id(check) + output.extend(cls.check_source(filepath, file_data, tu, shared_check_data)) + + if not output: + return "" + return "\n".join(output) + + +def check_source_file_for_imap(args: Tuple[str, Sequence[str], Sequence[str], Sequence[Any]]) -> str: + return check_source_file(*args) + + +def source_info_filter( + source_info: List[Tuple[str, List[str], List[str]]], + regex_list: Sequence[re.Pattern[str]], +) -> List[Tuple[str, List[str], List[str]]]: + source_dir = project_source_info.SOURCE_DIR + if not source_dir.endswith(os.sep): + source_dir += os.sep + source_info_result = [] + for item in source_info: + filepath_source = item[0] + if filepath_source.startswith(source_dir): + filepath_source_relative = filepath_source[len(source_dir):] + if filepath_source_relative in CHECKER_EXCLUDE_SOURCE_FILES: + CHECKER_EXCLUDE_SOURCE_FILES.remove(filepath_source_relative) + continue + if filepath_source_relative.startswith("intern" + os.sep + "ghost"): + pass + elif filepath_source_relative.startswith("source" + os.sep): + pass + else: + continue + + has_match = False + for regex in regex_list: + if regex.match(filepath_source_relative) is not None: + has_match = True + if not has_match: + continue + + source_info_result.append(item) + + if CHECKER_EXCLUDE_SOURCE_FILES: + sys.stderr.write( + "Error: exclude file(s) are missing: {!r}\n".format((list(sorted(CHECKER_EXCLUDE_SOURCE_FILES)))) + ) + sys.exit(1) + + return source_info_result + + +def run_checks_on_project( + check_ids: Sequence[str], + regex_list: Sequence[re.Pattern[str]], +) -> None: + source_info = project_source_info.build_info(ignore_prefix_list=CHECKER_IGNORE_PREFIX) + source_defines = project_source_info.build_defines_as_args() + + # Apply exclusion. + source_info = source_info_filter(source_info, regex_list) + + shared_check_data_foreach_check = [ + check_class_from_id(check).setup() for check in check_ids + ] + + all_args = [] + index = 0 + for filepath_source, inc_dirs, defs in source_info[index:]: + args = ( + [("-I" + i) for i in inc_dirs] + + [("-D" + d) for d in defs] + + source_defines + ) + + all_args.append((filepath_source, args, check_ids, shared_check_data_foreach_check)) + + import multiprocessing + + if USE_MULTIPROCESS: + job_total = multiprocessing.cpu_count() + 1 + with multiprocessing.Pool(processes=job_total) as pool: + # No `istarmap`, use an intermediate function. + for result in pool.imap(check_source_file_for_imap, all_args): + if result: + print(result) + else: + for (filepath_source, args, _check_ids, shared_check_data_foreach_check) in all_args: + result = check_source_file(filepath_source, args, check_ids, shared_check_data_foreach_check) + if result: + print(result) + + for (check, shared_check_data) in zip(check_ids, shared_check_data_foreach_check): + check_class_from_id(check).teardown(shared_check_data) + + +def create_parser(checkers_all: Sequence[str]) -> argparse.ArgumentParser: + from textwrap import indent + + # Create doc-string for checks. + checks_all_docs = [] + for checker in checkers_all: + # `%` -> `%%` is needed for `--help` not to interpret these as formatting arguments. + checks_all_docs.append( + " %s\n%s" % ( + checker, + indent(check_docstring_from_id(checker).replace("%", "%%"), ' '), + ) + ) + + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawTextHelpFormatter, + ) + parser.add_argument( + "--match", + nargs='+', + required=True, + metavar="REGEX", + help="Match file paths against this expression", + ) + parser.add_argument( + "--checks", + dest="checks", + help=( + "Specify the check presets to run.\n\n" + + "\n".join(checks_all_docs) + "\n" + "Multiple checkers may be passed at once (comma separated, no spaces)."), + required=True, + ) + + return parser + + +# ----------------------------------------------------------------------------- +# Main Function + +def main() -> int: + checkers_all = check_function_get_all() + parser = create_parser(checkers_all) + args = parser.parse_args() + + regex_list = [] + + for expr in args.match: + try: + regex_list.append(re.compile(expr)) + except Exception as ex: + print("Error in expression: \"{:s}\"\n {!r}".format(expr, ex)) + return 1 + + run_checks_on_project(args.checks.split(','), regex_list) + return 0 + + +if __name__ == "__main__": + sys.exit(main())