make license: back port changes from main

Back port fix & changes from main to keep licenses compatible for all
releases:

- 675aa86ca6
- 9f0043b8d6
- 81025eabd2
This commit is contained in:
Campbell Barton
2024-10-21 20:56:45 +11:00
parent d1857d1d58
commit 9df897a1d8

View File

@@ -1,25 +1,36 @@
# SPDX-FileCopyrightText: 2024 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
# pylint: disable=wrong-import-position, missing-function-docstring, missing-module-docstring, missing-class-docstring
# pylint: disable=missing-function-docstring, missing-module-docstring, missing-class-docstring
from dataclasses import dataclass
from pathlib import Path
import itertools
import json
import os
import re
import sys
from pathlib import Path
from typing import (
Iterator,
NamedTuple,
)
# -----------------------------------------------------------------------------
# Path Constants
ROOT_DIR = Path(os.path.abspath(Path(os.path.dirname(__file__)) / "../../"))
DIRPATH_LICENSES: Path = ROOT_DIR / "release/license/"
DIRPATH_EXTERN_LIBRARIES: Path = ROOT_DIR / "extern"
FILEPATH_VERSIONS_CMAKE: Path = ROOT_DIR / "build_files/build_environment/cmake/versions.cmake"
FILEPATH_LICENSES_INDEX: Path = DIRPATH_LICENSES / "licenses.json" # List of licenses and definitions.
FILEPATH_LICENSE_GENERATED: Path = DIRPATH_LICENSES / "license.md" # Generated licenses file.
CURRENT_DIR = Path(os.path.dirname(__file__))
ROOT = Path(os.path.abspath(CURRENT_DIR / "../../"))
@dataclass
class Filepaths:
licenses_folder: Path = ROOT / "release/license/"
versions_cmake: Path = ROOT / "build_files/build_environment/cmake/versions.cmake"
extern_libraries: Path = ROOT / "extern"
licenses_index: Path = licenses_folder / "licenses.json" # List of licenses and definitions.
license_generated: Path = licenses_folder / "license.md" # Generated licenses file.
# -----------------------------------------------------------------------------
# Constants
INTRODUCTION = r"""<!--
@@ -62,41 +73,48 @@ They work together as a stack.
"""
from collections import defaultdict
from typing import Generator, Union
import json
import re
import sys
NO_LICENSE = "No License Set"
LICENSES_NOT_NEEDED = {
'',
"",
}
LicenseDict = defaultdict[str, defaultdict[str, dict[str, str]]]
# -----------------------------------------------------------------------------
# Types
# Raw data extracted either from:
# - `README.blender` files.
# - `./build_files/build_environment/cmake/versions.cmake`.
class LibraryRaw(NamedTuple):
name: str
homepage: str
version: str
license: str
exception: str
copyright: str
def int_to_superscript(num: int) -> str:
# Mapping of regular digits to superscript Unicode characters.
superscript_map = {
'0': '',
'1': '¹',
'2': '²',
'3': '³',
'4': '',
'5': '',
'6': '',
'7': '',
'8': '',
'9': ''
"0": "",
"1": "¹",
"2": "²",
"3": "³",
"4": "",
"5": "",
"6": "",
"7": "",
"8": "",
"9": ""
}
# Convert the integer to a string and map each digit to its superscript equivalent.
return ''.join(superscript_map[digit] for digit in str(num))
return "".join(superscript_map[digit] for digit in str(num))
class Library:
__slots__ = ('name', 'version', 'homepage', 'library_copyright', 'exception')
__slots__ = ("name", "version", "homepage", "library_copyright", "exception")
name: str
version: str
@@ -120,40 +138,40 @@ class Library:
self.library_copyright = library_copyright
self.exception = exception
def __lt__(self, other: 'Library') -> bool:
def __lt__(self, other: "Library") -> bool:
return self.name.lower() < other.name.lower()
def check_missing_copyright(self, library_license: 'License') -> None:
def check_missing_copyright(self, library_license: "License") -> None:
"""Some licenses require a copyright notice"""
if self.library_copyright or library_license.copyright_exemption:
return
print(f'Warning: "{self.name}" missing copyright notice '
f'(required for {library_license.identifier}).')
print(f"Warning: \"{self.name}\" missing copyright notice "
f"(required for {library_license.identifier}).")
def dump(self, library_license: 'License') -> str:
def dump(self, library_license: "License") -> str:
self.check_missing_copyright(library_license)
library_copyright = f'`{self.library_copyright}`' if self.library_copyright else "-"
name = f'[{self.name}]({self.homepage})' if self.homepage else self.name
version = self.version[:11] if self.version else '-'
library_copyright = f"`{self.library_copyright}`" if self.library_copyright else "-"
name = f"[{self.name}]({self.homepage})" if self.homepage else self.name
version = self.version[:11] if self.version else "-"
# Add exception indicator in the name.
name += library_license.get_exception_suffix(self.exception)
raw_data = (
f'| {name} '
f'| {version} '
f"| {name} "
f"| {version} "
)
if not library_license.copyright_exemption:
raw_data += f'| {library_copyright} '
raw_data += f"| {library_copyright} "
raw_data += '|\n'
raw_data += "|\n"
return raw_data
class License:
__slots__ = ('identifier', 'name', 'url', 'copyright_exemption', 'libraries', 'exceptions')
__slots__ = ("identifier", "name", "url", "copyright_exemption", "libraries", "exceptions")
identifier: str
name: str
@@ -162,21 +180,28 @@ class License:
libraries: list[Library]
exceptions: list[str]
def __init__(self, identifier: str, **kwargs: str) -> None:
def __init__(
self,
*,
identifier: str,
name: str,
url: str,
copyright_exemption: str = "",
):
self.identifier = identifier
self.name = kwargs["name"]
self.url = kwargs["url"]
self.name = name
self.url = url
# By default we assume that all the licenses require copyright.
self.copyright_exemption = kwargs.get("copyright_exemption", "")
self.copyright_exemption = copyright_exemption
self.libraries = []
self.exceptions = []
@property
def filepath(self) -> str:
if self.identifier.startswith("SPDX"):
filepath = os.path.join(Filepaths.licenses_folder, "spdx", f"{self.identifier[5:]}.txt")
filepath = os.path.join(DIRPATH_LICENSES, "spdx", f"{self.identifier[5:]}.txt")
else:
filepath = os.path.join(Filepaths.licenses_folder, "others", f"{self.identifier}.txt")
filepath = os.path.join(DIRPATH_LICENSES, "others", f"{self.identifier}.txt")
return filepath
def get_exception_suffix(self, exception: str) -> str:
@@ -201,24 +226,35 @@ class License:
if self.copyright_exemption:
return ""
print(f'Error: Could not find license file for {self.identifier}: "{self.filepath}"')
print(f"Error: Could not find license file for {self.identifier}: \"{self.filepath}\"")
sys.exit(1)
with open(self.filepath, 'r', encoding="utf8") as fh:
with open(self.filepath, "r", encoding="utf8") as fh:
license_raw = fh.read()
# Strip trailing space as this has a special meaning for mark-down,
# avoid editing the original texts as any edits may be overwritten
# when updating the licenses.
#
# This also removes page breaks "\x0C" or ^L.
# These could be replaced with sometime similar in markdown,
# unless this has some benefit, leave as-is.
license_raw = "\n".join(line.rstrip() for line in license_raw.split("\n"))
# Debug option commented out.
# This is useful if you want a human-inspectionable document without the licenses.
# license_raw = "# Debug"
summary_prefix = f'{int_to_superscript(index)} ' if index else ''
library_license = f'<details>\n<summary>{summary_prefix}{self.name}</summary>\n' \
f'\n{license_raw}\n' \
summary_prefix = f"{int_to_superscript(index)} " if index else ""
library_license = (
f"<details>\n<summary>{summary_prefix}{self.name}</summary>\n"
f"\n{license_raw}\n"
"</details>"
)
return library_license
def __lt__(self, other: 'License') -> bool:
def __lt__(self, other: "License") -> bool:
return self.name.lower() < other.name.lower()
def __repr__(self) -> str:
@@ -233,33 +269,24 @@ class License:
return json.dumps(as_dict, indent=2)
def initialize_licenses() -> dict[str, License]:
index_filepath = Filepaths.licenses_index
# -----------------------------------------------------------------------------
# Internal Logic
with open(index_filepath, 'r', encoding="utf8") as fh:
def initialize_licenses() -> dict[str, License]:
with open(FILEPATH_LICENSES_INDEX, "r", encoding="utf8") as fh:
licenses_json = json.load(fh)
licenses = {key: License(identifier=key, **values) for key, values in licenses_json.items()}
return licenses
def add_libraries_to_licenses(
licenses_data: LicenseDict,
libraries_raw: dict[str, dict[str, str]]) -> None:
for _library, details in libraries_raw.items():
license_name = details.get("license") or NO_LICENSE
name = details["name"]
licenses_data[license_name][name] = details
def get_license_exception(library_license: str) -> tuple[str, str]:
"""Split license into main license and exception
Example of acceptable license: "SPDX:Apache-2.0 WITH LLVM-exception"
This would output: ("SPDX:Apache-2.0", "LLVM-exception")
"""
# Use re.IGNORECASE to match "with" in any case (e.g., "with" or "WITH").
# Use `re.IGNORECASE` to match "with" in any case (e.g., "with" or "WITH").
re_match = re.match(r"^(.*)\swith\s(.+)$", library_license, re.IGNORECASE)
if re_match:
return re_match.group(1).strip(), re_match.group(2).strip()
@@ -268,125 +295,135 @@ def get_license_exception(library_license: str) -> tuple[str, str]:
def flatten_cmake_file(content: str) -> str:
"""Resolve all the ${VARIABLES} in CMake"""
# Find all variable definitions of the form set(VAR_NAME VALUE).
variables = dict(re.findall(r'set\((\w+)\s+([^\)]+)\)', content))
# Find all variable definitions of the form `set(VAR_NAME VALUE)`.
variables = dict(re.findall(r"set\((\w+)\s+([^\)]+)\)", content))
# Replace all occurrences of ${VAR_NAME} with the corresponding value.
for var, value in variables.items():
content = re.sub(rf'\$\{{{var}\}}', value, content)
content = re.sub(rf"\$\{{{var}\}}", value, content)
return content
def process_versions_cmake(licenses_data: LicenseDict) -> None:
def process_versions_cmake() -> Iterator[LibraryRaw]:
"""
Parse versions.cmake
Return a dictionary grouped by license.
"""
# pylint: disable=too-many-locals
libraries_raw = {}
with open(Filepaths.versions_cmake, 'r', encoding="utf8") as versions_cmake:
data = versions_cmake.read()
data = flatten_cmake_file(data)
for re_match in re.finditer(r'^set\((\w+)\s+', data, re.MULTILINE):
# Use regex to capture the key from each set() statement.
# Extract the value from the remainder.
key = re_match.group(1)
value_start = re_match.end()
value_eol = data.find("\n", value_start)
assert value_eol != -1
value_line = data[value_start: value_eol].rstrip()
# NOTE(@ideasman42): basic & imperfect variable extractions.
# It can be fairly easily tripped up by expressions such as:
# - `set(VAR "VALUE ) # ")`
# - Uppercase or additional spaces e.g. `SET (...)`.
# - Or `set()` expressions inside a multi-line string.
#
# Any effort to rewrite this logic would be better spent running the file through CMake it's self,
# appending logic to dump all variables using `string(JSON ...)` which Python can then read reliably.
# Strip any comments at the line end.
# set(FOO BAR) # BAZ.
if (re_match_comment := next(re.finditer(r"\)\s*#", value_line), None)):
value_line = value_line[:re_match_comment.start() + 1]
libraries_raw: dict[str, dict[str, str]] = {}
with open(FILEPATH_VERSIONS_CMAKE, "r", encoding="utf8") as fh:
data = fh.read()
# Extract the value by checking this line and detecting single or multi-line text.
if value_line.endswith(")"):
# Single line variable.
value = value_line[:-1].strip()
elif value_line.endswith("[=["):
# Calculate the bounds of the multi-line string.
value_ml_start = value_start + len(value_line)
value_ml_end = data.find("]=]", value_ml_start)
assert value_ml_end != -1
value = data[value_ml_start:value_ml_end].strip().replace("\n", " ")
else:
# Could not detect a single line value OR a multi-line value.
print(f"Error: Unable to parse {key!r}, line {value_line!r}, "
"expected an \")\" ending or beginning of a multi-line string \"[=[\"")
sys.exit(1)
data = flatten_cmake_file(data)
for re_match in re.finditer(r"^set\((\w+)\s+", data, re.MULTILINE):
# Use regex to capture the key from each set() statement.
# Extract the value from the remainder.
key = re_match.group(1)
value_start = re_match.end()
value_eol = data.find("\n", value_start)
assert value_eol != -1
value_line = data[value_start: value_eol].rstrip()
# Determine the library name from the prefix (minus the suffix).
library_name = '_'.join(key.split('_')[:-1])
# Strip any comments at the line end.
# `set(FOO BAR) # BAZ`.
if (re_match_comment := next(re.finditer(r"\)\s*#", value_line), None)):
value_line = value_line[:re_match_comment.start() + 1]
# Initialize the library entry if it doesn't exist.
if library_name not in libraries_raw:
libraries_raw[library_name] = {
"name": library_name.replace("_", " ").title(),
"homepage": "",
"version": "",
"license": "",
"exception": "",
"copyright": "",
"hash": "",
"build_time_only": "",
}
# Extract the value by checking this line and detecting single or multi-line text.
if value_line.endswith(")"):
# Single line variable.
value = value_line[:-1].strip()
elif value_line.endswith("[=["):
# Calculate the bounds of the multi-line string.
value_ml_start = value_start + len(value_line)
value_ml_end = data.find("]=]", value_ml_start)
assert value_ml_end != -1
value = data[value_ml_start:value_ml_end].strip().replace("\n", " ")
else:
# Could not detect a single line value OR a multi-line value.
print(f"Error: Unable to parse {key!r}, line {value_line!r}, "
"expected an \")\" ending or beginning of a multi-line string \"[=[\"")
sys.exit(1)
# Populate the relevant fields based on the key.
end_word = key.split('_')[-1]
match end_word:
case 'NAME':
libraries_raw[library_name]["name"] = value.strip('"')
case 'HOMEPAGE':
libraries_raw[library_name]["homepage"] = value.strip('"')
case 'VERSION':
libraries_raw[library_name]["version"] = value.strip('"')
case 'LICENSE':
library_license, exception = get_license_exception(value)
libraries_raw[library_name]["license"] = library_license
libraries_raw[library_name]["exception"] = exception
case 'COPYRIGHT':
libraries_raw[library_name]["copyright"] = value.strip('"')
case 'HASH':
libraries_raw[library_name]["hash"] = value
case 'DEPSBUILDTIMEONLY':
# Use only strings to simplify the typechecking.
libraries_raw[library_name]["build_time_only"] = "True"
# Determine the library name from the prefix (minus the suffix).
library_name, end_word = key.rpartition("_")[0::2]
if not library_name:
# No suffix to check, it can be skipped.
continue
# Initialize the library entry if it doesn't exist.
if (library_vars := libraries_raw.get(library_name)) is None:
library_vars = libraries_raw[library_name] = {
"name": library_name.replace("_", " ").title(),
"homepage": "",
"version": "",
"license": "",
"exception": "",
"copyright": "",
# Exclude from `LibraryRaw`.
"_hash": "",
"_build_time_only": "",
}
# Populate the relevant fields based on the key.
match end_word:
case "NAME":
library_vars["name"] = value.strip('"')
case "HOMEPAGE":
library_vars["homepage"] = value.strip('"')
case "VERSION":
library_vars["version"] = value.strip('"')
case "LICENSE":
library_license, exception = get_license_exception(value)
library_vars["license"] = library_license
library_vars["exception"] = exception
case "COPYRIGHT":
library_vars["copyright"] = value.strip('"')
case "HASH":
library_vars["_hash"] = value
case "DEPSBUILDTIMEONLY":
# Use only strings to simplify the type-checking.
library_vars["_build_time_only"] = "True"
# If there is no hash we assume it is not a real library but some other information on the file.
# Also remove any library which is only used during build time and have no
# artifact included in the final Blender binary.
libraries_clean = {
key: {k: v for k, v in data.items() if k not in {"build_time_only", "hash"}}
for key, data in libraries_raw.items()
if data["hash"] and not data["build_time_only"]
}
add_libraries_to_licenses(licenses_data, libraries_clean)
for key, lib_info_args in libraries_raw.items():
if not (lib_info_args["_hash"] and not lib_info_args["_build_time_only"]):
continue
yield LibraryRaw(**{k: v for k, v in lib_info_args.items() if not k.startswith("_")})
def iterate_readme_files(base_folder: Path) -> Generator[str, None, None]:
base_path = Path(base_folder)
def iterate_readme_files(base_dir: Path) -> Iterator[str]:
base_path = Path(base_dir)
# Iterate over all subdirectories.
for subfolder in base_path.iterdir():
if not subfolder.is_dir():
for subdir in base_path.iterdir():
if not subdir.is_dir():
continue
readme_path = subfolder / "README.blender"
readme_path = subdir / "README.blender"
if not readme_path.exists():
print(f'Warning: Missing file "{readme_path}"')
print(f"Warning: Missing file \"{readme_path}\"")
continue
with readme_path.open('r', encoding="utf8") as readme_file:
contents = readme_file.read()
yield contents
with readme_path.open("r", encoding="utf8") as fh:
contents = fh.read()
yield contents
def process_readme_blender(licenses_data: LicenseDict) -> None:
def process_readme_blender() -> Iterator[LibraryRaw]:
""""Handle the README.blender files"""
keys = {
"Project": "name",
@@ -396,13 +433,9 @@ def process_readme_blender(licenses_data: LicenseDict) -> None:
"Copyright": "copyright"
}
libraries_raw = {}
for readme in iterate_readme_files(Filepaths.extern_libraries):
for readme in iterate_readme_files(DIRPATH_EXTERN_LIBRARIES):
lines = readme.strip().split("\n")
# Initialize an empty dictionary to store the project data.
project_data = {}
# Temporary storage for project fields.
project_fields = {}
@@ -427,52 +460,64 @@ def process_readme_blender(licenses_data: LicenseDict) -> None:
# Split the license into license and its (optional) extension.
library_license, exception = get_license_exception(project_fields.get("license", ""))
project_data = {
"name": project_name,
"version": project_fields.get("version", ""),
"homepage": project_fields.get("homepage", ""),
"license": library_license,
"exception": exception,
"copyright": project_fields.get("copyright", "")
}
libraries_raw[project_name] = project_data
add_libraries_to_licenses(licenses_data, libraries_raw)
yield LibraryRaw(
name=project_name,
version=project_fields.get("version", ""),
homepage=project_fields.get("homepage", ""),
license=library_license,
exception=exception,
copyright=project_fields.get("copyright", ""),
)
def fetch_libraries_licenses(licenses: dict[str, License]) -> None:
def fetch_libraries_licenses() -> dict[str, License]:
"""Populate the licenses dict with its corresponding libraries and copyrights"""
licenses_data: LicenseDict = defaultdict(lambda: defaultdict(dict))
# Get data from versions.cmake.
process_versions_cmake(licenses_data)
# Get data from README.blender files.
process_readme_blender(licenses_data)
licenses = initialize_licenses()
# Intermediate storage.
# Map the license name to all libraries that use it.
# Keys may be: `SPDX:GPL-2.0-or-later`, `SPDX:MIT`, ... `ICS` etc.
licenses_data: dict[str, list[LibraryRaw]] = {}
for lib_info in itertools.chain(
# Get data from `./build_files/build_environment/cmake/versions.cmake`.
process_versions_cmake(),
# Get data from `README.blender` files.
process_readme_blender(),
):
license_name = lib_info.license or NO_LICENSE
if (libraries_data := licenses_data.get(license_name)) is None:
libraries_data = licenses_data[license_name] = []
libraries_data.append(lib_info)
# Populate licenses with the corresponding libraries.
for license_key, libraries_data in licenses_data.items():
if license_key in licenses:
license_obj = licenses[license_key]
for lib_name, lib_info in libraries_data.items():
library = Library(
name=lib_name,
version=lib_info["version"],
homepage=lib_info["homepage"],
library_copyright=lib_info["copyright"],
exception=lib_info["exception"],
)
license_obj.libraries.append(library)
elif license_key == NO_LICENSE:
print('Warning: The following libraries have no license:')
for lib_name, _lib_info in libraries_data.items():
print(f' * {lib_name}')
elif license_key in LICENSES_NOT_NEEDED:
if license_key == NO_LICENSE:
print("Warning: The following libraries have no license:")
for lib_info in libraries_data:
print(f" * {lib_info.name}")
continue
if license_key in LICENSES_NOT_NEEDED:
# Do nothing about these licenses.
pass
else:
print(f'Error: {license_key} license not found in: "{Filepaths.licenses_index}"')
continue
if (license_obj := licenses.get(license_key)) is None:
# Do nothing about these licenses.
print(f"Error: {license_key} license not found in: \"{FILEPATH_LICENSES_INDEX}\"")
continue
for lib_info in libraries_data:
library = Library(
name=lib_info.name,
version=lib_info.version,
homepage=lib_info.homepage,
library_copyright=lib_info.copyright,
exception=lib_info.exception,
)
license_obj.libraries.append(library)
return licenses
def extract_licenses(text: str) -> set[str]:
@@ -488,10 +533,10 @@ def extract_licenses(text: str) -> set[str]:
{"SPDX:GPL-3.0-or-later", "Arev-Fonts"}
"""
# Remove multi-line comments (<!-- ... -->).
text = re.sub(r'<!--.*?-->', '', text, flags=re.DOTALL)
text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
# Find all licenses in < >.
license_pattern = r'<([^<|>]+?)>'
license_pattern = r"<([^<|>]+?)>"
# Find all matches.
matches = re.findall(license_pattern, text)
@@ -508,27 +553,27 @@ def get_introduction(licenses: dict[str, License]) -> str:
for license_lookup in license_lookups:
if license_lookup not in licenses:
print(f'Error: {license_lookup} license not found in: "{Filepaths.licenses_index}"')
print(f"Error: {license_lookup} license not found in: \"{FILEPATH_LICENSES_INDEX}\"")
continue
license_item = licenses[license_lookup]
introduction = introduction.replace(
f'<{license_lookup}>',
f"<{license_lookup}>",
license_item.dump()
)
introduction = introduction.replace(
f'<{license_lookup}|link>',
f'[{license_item.name}]({license_item.url})'
f"<{license_lookup}|link>",
f"[{license_item.name}]({license_item.url})"
)
return introduction
def generate_license_file(licenses: dict[str, License]) -> None:
filepath = Filepaths.license_generated
filepath = FILEPATH_LICENSE_GENERATED
with open(filepath, 'w', encoding='utf8') as fh:
with open(filepath, "w", encoding="utf8") as fh:
fh.write(get_introduction(licenses))
for license_item in sorted(licenses.values()):
@@ -536,12 +581,12 @@ def generate_license_file(licenses: dict[str, License]) -> None:
continue
if license_item.url:
fh.write(f'\n\n## [{license_item.name}]({license_item.url})\n\n')
fh.write(f"\n\n## [{license_item.name}]({license_item.url})\n\n")
else:
fh.write(f'\n\n## {license_item.name}\n\n')
fh.write(f"\n\n## {license_item.name}\n\n")
if license_item.copyright_exemption:
fh.write(f'{license_item.copyright_exemption}\n\n')
fh.write(f"{license_item.copyright_exemption}\n\n")
fh.write("| Library | Version |\n")
fh.write("| ------- | ------- |\n")
else:
@@ -556,22 +601,22 @@ def generate_license_file(licenses: dict[str, License]) -> None:
exception_license = licenses.get(exception)
if exception_license is None:
print(
f'Error: {exception} extension license not found in: '
f'{Filepaths.licenses_index}"'
)
print(f"Error: {exception} extension license not found in: \"{FILEPATH_LICENSES_INDEX}\"")
continue
fh.write(exception_license.dump(i + 1))
fh.write("\n")
print(f'\nLicense file successfully generated: "{filepath}"')
print(f"\nLicense file successfully generated: \"{filepath}\"")
print("Remember to commit the file to the Blender repository.\n")
def main() -> None:
licenses = initialize_licenses()
# -----------------------------------------------------------------------------
# Main Function
fetch_libraries_licenses(licenses)
def main() -> None:
licenses = fetch_libraries_licenses()
generate_license_file(licenses)