Files
test2/tools/check_source/check_licenses.py
Campbell Barton 0148293520 License headers: add SPDX licenses for '*.glsl' files
When GLSL sources were first included in Blender they were treated as
data (like blend files) and had no license header.
Since then GLSL has been used for more sophisticated features
(EEVEE & real-time compositing)
where it makes sense to include licensing information.

Add SPDX copyright headers to *.glsl files, matching headers used for
C/C++, also include GLSL files in the license checking script.

As leading C-comments are now stripped,
added binary size of comments is no longer a concern.

Ref !111247
2023-08-24 10:57:03 +10:00

593 lines
19 KiB
Python

#!/usr/bin/env python3
# SPDX-FileCopyrightText: 2022-2023 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
"""
Check license headers follow the SPDX spec
https://spdx.org/licenses/
This can be activated by calling "make check_licenses" from Blenders root directory.
"""
import os
import argparse
import datetime
import re
from dataclasses import dataclass
from typing import (
Callable,
Dict,
Generator,
List,
Tuple,
)
# -----------------------------------------------------------------------------
# Constants
# Add one, maybe someone runs this on new-years in another timezone or so.
YEAR_MAX = datetime.date.today().year + 1
# Lets not worry about software written before this time.
YEAR_MIN = 1950
YEAR_RANGE = range(YEAR_MIN, YEAR_MAX + 1)
# Faster bug makes exceptions and errors more difficult to troubleshoot.
USE_MULTIPROCESS = False
EXPECT_SPDX_IN_FIRST_CHARS = 1024
# Show unique headers after modifying them.
# Useful when reviewing changes as there may be many duplicates.
REPORT_UNIQUE_HEADER_MAPPING = False
mapping: Dict[str, List[str]] = {}
SOURCE_DIR = os.path.normpath(
os.path.abspath(
os.path.normpath(os.path.join(os.path.dirname(__file__), "..", ".."))
)
)
SPDX_IDENTIFIER_FILE = os.path.join(
SOURCE_DIR, "doc", "license", "SPDX-license-identifiers.txt"
)
SPDX_IDENTIFIER_UNKNOWN = "*Unknown License*"
with open(SPDX_IDENTIFIER_FILE, "r", encoding="utf-8") as fh:
ACCEPTABLE_LICENSES = set(line.split()[0] for line in sorted(fh) if "https://spdx.org/licenses/" in line)
del fh
# -----------------------------------------------------------------------------
# Global Variables
# Count how many licenses are used.
SPDX_IDENTIFIER_STATS: Dict[str, int] = {SPDX_IDENTIFIER_UNKNOWN: 0}
# -----------------------------------------------------------------------------
# File Type Checks
# Use `/* .. */` style comments.
def filename_is_c_compat(filename: str) -> bool:
return filename.endswith(
(
# C.
".c",
".h",
# C++
".cc",
".cxx",
".cpp",
".hh",
".hxx",
".hpp",
".inl",
# Objective-C/C++
".m",
".mm",
# OpenGL Shading Language.
".glsl",
# OPENCL.
".cl",
# CUDA.
".cu",
# Metal.
".metal",
# Metal Shading Language.
".msl",
# Open Shading Language.
".osl",
# Cycles uses this extension.
".tables",
)
)
def filename_is_cmake(filename: str) -> bool:
return filename.endswith(("CMakeLists.txt", ".cmake"))
# Use '#' style comments.
def filename_is_script_compat(filename: str) -> bool:
return filename.endswith((".py", ".sh", "GNUmakefile"))
# -----------------------------------------------------------------------------
# Cursor Motion
def txt_next_line_while_fn(text: str, index: int, fn: Callable[[str], bool]) -> int:
"""
Return the next line where ``fn`` fails.
"""
while index < len(text):
index_prev = index
index = text.find("\n", index)
if index == -1:
index = len(text)
if not fn(text[index_prev:index]):
index = index_prev
break
# Step over the newline.
index = index + 1
return index
def txt_next_eol(text: str, pos: int, limit: int, step_over: bool) -> int:
"""
Extend ``pos`` to just before the next EOL, otherwise EOF.
As this is intended for use as a range, ``text[pos]``
will either be ``\n`` or equal to out of range (equal to ``len(text)``).
"""
if pos + 1 >= len(text):
return pos
# Already at the bounds.
if text[pos] == "\n":
return pos + (1 if step_over else 0)
pos_next = text.find("\n", pos, limit)
if pos_next == -1:
return limit
return pos_next + (1 if step_over else 0)
def txt_prev_bol(text: str, pos: int, limit: int) -> int:
if pos == 0:
return pos
# Already at the bounds.
if text[pos - 1] == "\n":
return pos
pos_next = text.rfind("\n", limit, pos)
if pos_next == -1:
return limit
# We don't want to include the newline.
return pos_next + 1
def txt_anonymous_years(text: str) -> str:
"""
Replace year with text, since we don't want to consider them different when looking at unique headers.
"""
# Replace year ranges with `2005-2009`: `####`.
def key_replace_range(match: re.Match[str]) -> str:
values = match.groups()
if int(values[0]) in YEAR_RANGE and int(values[1]) in YEAR_RANGE:
return '#' * len(values[0])
return match.group()
text = re.sub(r'([0-9]+)-([0-9]+)', key_replace_range, text)
# Replace year ranges with `2005`: `####`.
def key_replace(match: re.Match[str]) -> str:
values = match.groups()
if int(values[0]) in YEAR_RANGE:
return '#' * len(values[0])
return match.group()
text = re.sub(r'([0-9]+)', key_replace, text)
return text
def txt_find_next_indented_block(text: str, find: str, pos: int, limit: int) -> Tuple[int, int]:
"""
Support for finding an indented block of text.
Return the identifier index and the end of the block.
Where searching for ``SPDX-FileCopyrightText: ``
.. code-block::
# SPDX-FileCopyrightText: 2020 Name
^ begin ^ end.
With multiple lines supported:
.. code-block::
# SPDX-FileCopyrightText: 2020 Name
# 2021 Another Name
^ begin (one line up) ^ end.
"""
pos_found = text.find(find, pos, limit)
if pos_found == -1:
return (-1, -1)
pos_next = txt_next_eol(text, pos_found, limit - 1, False) + 1
if pos_next != limit:
pos_found_indent = pos_found - txt_prev_bol(text, pos_found, 0)
while True:
# Step over leading comment chars.
pos_next_test = pos_next + pos_found_indent
pos_next_step = pos_next_test + len(find)
# The next lines text is indented.
text_indent = text[pos_next_test:pos_next_step]
if (len(text_indent) == pos_next_step - pos_next_test) and (not text[pos_next_test:pos_next_step].strip()):
pos_next = txt_next_eol(text, pos_next_step, limit - 1, step_over=False) + 1
else:
break
return (pos_found, pos_next)
# -----------------------------------------------------------------------------
# License Checker
def check_contents(filepath: str, text: str) -> None:
"""
Check for license text, e.g: ``SPDX-License-Identifier: GPL-2.0-or-later``
Intentionally be strict here... no extra spaces, no trailing space at the end of line etc.
As there is no reason to be sloppy in this case.
"""
text_header = text[:EXPECT_SPDX_IN_FIRST_CHARS]
# Use the license to limit the copyright search,
# so code-generation that includes copyright headers don't cause false alarms.
license_id = " SPDX-License-Identifier: "
license_id_beg = text_header.find(license_id)
if license_id_beg == -1:
# Allow completely empty files (sometimes `__init__.py`).
if not text.rstrip():
return
# Empty file already accounted for.
print("Missing {:s}{:s}".format(license_id, filepath))
return
# Check copyright text, reading multiple (potentially multi-line indented) blocks.
copyright_id = " SPDX-FileCopyrightText: "
copyright_id_step = 0
copyright_id_beg = -1
copyright_id_end = -1
while ((copyright_id_item := txt_find_next_indented_block(
text_header,
copyright_id,
copyright_id_step,
license_id_beg,
)) != (-1, -1)):
if copyright_id_end == -1:
# Set once.
copyright_id_beg = copyright_id_item[0]
else:
lines = text_header[copyright_id_end:copyright_id_item[0]].count("\n")
if lines != 0:
print(
"Expected no blank lines, found {:d} between \"{:s}\": {:s}".format(
lines,
copyright_id,
filepath,
))
copyright_id_end = copyright_id_item[1]
copyright_id_step = copyright_id_end
del copyright_id_item, copyright_id_step
if copyright_id_beg == -1:
print("Missing {:s}{:s}".format(copyright_id, filepath))
# Maintain statistics.
SPDX_IDENTIFIER_STATS[SPDX_IDENTIFIER_UNKNOWN] += 1
return
# Check for blank lines:
blank_lines = text[:copyright_id_beg].count("\n")
if filename_is_script_compat(filepath):
if blank_lines > 0 and text.startswith("#!/"):
blank_lines -= 1
if blank_lines > 0:
print("SPDX \"{:s}\" not on first line: {:s}".format(copyright_id, filepath))
# Leading char.
leading_char = text_header[txt_prev_bol(text_header, license_id_beg, 0):license_id_beg].strip()
text_blank_line = text_header[copyright_id_end:license_id_beg]
if (text_blank_line.count("\n") != 1) or (text_blank_line.replace(leading_char, "").strip() != ""):
print("Expected blank line between \"{:s}\" & \"{:s}\": {:s}".format(copyright_id, license_id, filepath))
del text_blank_line, leading_char
license_id_end = license_id_beg + len(license_id)
line_end = txt_next_eol(text, license_id_end, len(text), step_over=False)
license_text = text[license_id_end:line_end]
# For C/C++ comments.
license_text = license_text.rstrip("*/")
for license_id in license_text.split():
if license_id in {"AND", "OR"}:
continue
if license_id not in ACCEPTABLE_LICENSES:
print(
"Unexpected:",
"{:s}:{:d}".format(filepath, text[:license_id_beg].count("\n") + 1),
"contains license",
repr(license_text),
"not in",
SPDX_IDENTIFIER_FILE,
)
try:
SPDX_IDENTIFIER_STATS[license_id] += 1
except KeyError:
SPDX_IDENTIFIER_STATS[license_id] = 1
if REPORT_UNIQUE_HEADER_MAPPING:
if filename_is_c_compat(filepath):
comment_beg = text.rfind("/*", 0, license_id_beg)
if comment_beg == -1:
print("Comment Block:", filepath, "failed to find comment start")
return
comment_end = text.find("*/", license_id_end, len(text))
if comment_end == -1:
print("Comment Block:", filepath, "failed to find comment end")
return
comment_end += 2
comment_block = text[comment_beg + 2: comment_end - 2]
comment_block = "\n".join(
[line.removeprefix(" *") for line in comment_block.split("\n")]
)
elif filename_is_script_compat(filepath) or filename_is_cmake(filepath):
comment_beg = txt_prev_bol(text, license_id_beg, 0)
comment_end = txt_next_eol(text, license_id_beg, len(text), step_over=False)
comment_beg = txt_next_line_while_fn(
text,
comment_beg,
lambda line: line.startswith("#") and not line.startswith("#!/"),
)
comment_end = txt_next_line_while_fn(
text,
comment_end,
lambda line: line.startswith("#"),
)
comment_block = text[comment_beg:comment_end].rstrip()
comment_block = "\n".join(
[line.removeprefix("# ") for line in comment_block.split("\n")]
)
else:
raise Exception("Unknown file type: {:s}".format(filepath))
mapping.setdefault(txt_anonymous_years(comment_block), []).append(filepath)
def report_statistics() -> None:
"""
Report some final statistics of license usage.
"""
print("")
files_total = sum(SPDX_IDENTIFIER_STATS.values())
files_unknown = SPDX_IDENTIFIER_STATS[SPDX_IDENTIFIER_UNKNOWN]
files_percent = (1.0 - (files_unknown / files_total)) * 100.0
title = "License Statistics in {:,d} Files, {:.2f}% Complete".format(files_total, files_percent)
print("#" * len(title))
print(title)
print("#" * len(title))
print("")
max_length = max(len(k) for k in SPDX_IDENTIFIER_STATS.keys())
print(" License:" + (" " * (max_length - 7)) + "Files:")
print("")
items = [(k, "{:,d}".format(v)) for k, v in sorted(SPDX_IDENTIFIER_STATS.items())]
v_max = max([len(v) for _, v in items])
for k, v in items:
if v == "0":
continue
print("-", k + " " * (max_length - len(k)), (" " * (v_max - len(v))) + v)
print("")
# -----------------------------------------------------------------------------
# Main Function & Source Listing
operation = check_contents
def source_files(
path: str,
paths_exclude: Tuple[str, ...],
filename_test: Callable[[str], bool],
) -> Generator[str, None, None]:
# Split paths into directories & files.
dirs_exclude_list = []
files_exclude_list = []
for f in paths_exclude:
if not os.path.exists(f):
raise Exception("File {!r} doesn't exist!".format(f))
if os.path.isdir(f):
dirs_exclude_list.append(f)
else:
files_exclude_list.append(f)
del paths_exclude
dirs_exclude_set = set(p.rstrip("/") for p in dirs_exclude_list)
dirs_exclude = tuple(p.rstrip("/") + "/" for p in dirs_exclude_list)
files_exclude_set = set(p.rstrip("/") for p in files_exclude_list)
del dirs_exclude_list, files_exclude_list
for dirpath, dirnames, filenames in os.walk(path):
dirnames[:] = [d for d in dirnames if not d.startswith(".")]
if dirpath in dirs_exclude_set or dirpath.startswith(dirs_exclude):
continue
for filename in filenames:
if filename.startswith("."):
continue
filepath = os.path.join(dirpath, filename)
if filepath in files_exclude_set:
files_exclude_set.remove(filepath)
continue
if filename_test(filename):
yield filepath
if files_exclude_set:
raise Exception("Excluded paths not found: {!r}".format(repr(tuple(sorted(files_exclude_set)))))
def operation_wrap(filepath: str) -> None:
with open(filepath, "r", encoding="utf-8") as f:
try:
text = f.read()
except Exception as ex:
print("Failed to read", filepath, "with", repr(ex))
return
operation(filepath, text)
def argparse_create() -> argparse.ArgumentParser:
# When --help or no args are given, print this help
description = __doc__
parser = argparse.ArgumentParser(description=description)
parser.add_argument(
"--show-headers",
dest="show_headers",
type=bool,
default=False,
required=False,
help="Show unique headers (useful for spotting irregularities).",
)
return parser
def main() -> None:
global REPORT_UNIQUE_HEADER_MAPPING
args = argparse_create().parse_args()
REPORT_UNIQUE_HEADER_MAPPING = args.show_headers
# Ensure paths are relative to the root, no matter where this script runs from.
os.chdir(SOURCE_DIR)
@dataclass
class Pass:
filename_test: Callable[[str], bool]
source_paths_include: Tuple[str, ...]
source_paths_exclude: Tuple[str, ...]
passes = (
Pass(
filename_test=filename_is_c_compat,
source_paths_include=(".",),
source_paths_exclude=(
# Directories:
"./extern",
"./scripts/addons_contrib",
"./scripts/templates_osl",
"./tools",
# Needs manual handling as it mixes two licenses.
"./intern/atomic",
# Practically an "extern" within an "intern" module, leave as-is.
"./intern/itasc/kdl",
# TODO: Files in these directories should be handled but the files have valid licenses.
"./intern/libmv",
# Files:
# This file is generated by a configure script (no point in manually setting the license).
"./build_files/build_environment/patches/config_gmpxx.h",
# A modified `Apache-2.0` license.
"./intern/opensubdiv/internal/evaluator/shaders/glsl_compute_kernel.glsl",
),
),
Pass(
filename_test=filename_is_cmake,
source_paths_include=(".",),
source_paths_exclude=(
# Directories:
# This is an exception, it has its own CMake files we do not maintain.
"./extern/audaspace",
"./extern/quadriflow/3rd/lemon-1.3.1",
),
),
Pass(
filename_test=filename_is_script_compat,
source_paths_include=(".",),
source_paths_exclude=(
# Directories:
# This is an exception, it has its own CMake files we do not maintain.
"./extern",
"./scripts/addons_contrib",
# Just data.
"./doc/python_api/examples",
"./scripts/addons/presets",
"./scripts/presets",
"./scripts/templates_py",
),
),
)
for pass_data in passes:
if USE_MULTIPROCESS:
filepath_args = [
filepath
for dirpath in pass_data.source_paths_include
for filepath in source_files(
dirpath,
pass_data.source_paths_exclude,
pass_data.filename_test,
)
]
import multiprocessing
job_total = multiprocessing.cpu_count()
pool = multiprocessing.Pool(processes=job_total * 2)
pool.map(operation_wrap, filepath_args)
else:
for filepath in [
filepath
for dirpath in pass_data.source_paths_include
for filepath in source_files(
dirpath,
pass_data.source_paths_exclude,
pass_data.filename_test,
)
]:
operation_wrap(filepath)
if REPORT_UNIQUE_HEADER_MAPPING:
print("#####################")
print("Unique Header Listing")
print("#####################")
print("")
for k, v in sorted(mapping.items()):
print("=" * 79)
print(k)
print("-" * 79)
v.sort()
for filepath in v:
print("-", filepath)
print("")
report_statistics()
if __name__ == "__main__":
main()