Tools: add a utility to validate array sizes
The script check_source/static_check_size_comments.py run directly or called via the convenience target "make check_size_comments". Add a utility module: `line_number_utils` which implements a version of `re.finditer` that includes line numbers & ranges.
This commit is contained in:
88
tools/utils_maintenance/modules/line_number_utils.py
Normal file
88
tools/utils_maintenance/modules/line_number_utils.py
Normal file
@@ -0,0 +1,88 @@
|
||||
# SPDX-FileCopyrightText: 2025 Blender Authors
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
"""
|
||||
When writing text checking utilities, it's not always straightforward
|
||||
to find line numbers and ranges from an offset within the text.
|
||||
|
||||
This module provides helpers to efficiently do this.
|
||||
|
||||
The main utility is ``finditer_with_line_numbers_and_bounds``,
|
||||
an alternative to ``re.finditer`` which yields line numbers and offsets
|
||||
for the line bounds - useful for scanning files and reporting errors that include the line contents.
|
||||
"""
|
||||
|
||||
__all__ = (
|
||||
"finditer_newline_cache_compute",
|
||||
"finditer_with_line_numbers_and_bounds",
|
||||
"line_to_offset_range",
|
||||
)
|
||||
|
||||
from collections.abc import (
|
||||
Iterator,
|
||||
)
|
||||
|
||||
import re as _re
|
||||
|
||||
|
||||
def finditer_newline_cache_compute(text: str) -> tuple[dict[int, int], list[int]]:
|
||||
"""
|
||||
Return a tuple containing:
|
||||
Offset to
|
||||
"""
|
||||
# Offset to line lookup.
|
||||
offset_to_line_cache: dict[int, int] = {}
|
||||
# Line to offset lookup.
|
||||
line_to_offset_cache: list[int] = [0]
|
||||
|
||||
for i, m in enumerate(_re.finditer("\\n", text), 1):
|
||||
ofs = m.start()
|
||||
offset_to_line_cache[ofs] = i
|
||||
line_to_offset_cache.append(ofs)
|
||||
|
||||
return offset_to_line_cache, line_to_offset_cache
|
||||
|
||||
|
||||
def finditer_with_line_numbers_and_bounds(
|
||||
pattern: str,
|
||||
text: str,
|
||||
*,
|
||||
offset_to_line_cache: dict[int, int] | None = None,
|
||||
flags: int = 0,
|
||||
) -> Iterator[tuple[_re.Match[str], int, tuple[int, int]]]:
|
||||
"""
|
||||
A version of ``re.finditer`` that returns ``(match, line_number, line_bounds)``.
|
||||
|
||||
Note that ``offset_to_line_cache`` is the first return value from
|
||||
``finditer_newline_cache_compute``.
|
||||
This should be passed in if the iterator is called multiple times
|
||||
on the same buffer, to avoid calculating this every time.
|
||||
"""
|
||||
if offset_to_line_cache is None:
|
||||
offset_to_line_cache, line_to_offset_cache = finditer_newline_cache_compute(text)
|
||||
del line_to_offset_cache
|
||||
|
||||
text_len = len(text)
|
||||
for m in _re.finditer(pattern, text, flags):
|
||||
|
||||
if (beg := text.rfind("\n", 0, m.start())) == -1:
|
||||
beg = 0
|
||||
line_number = 0
|
||||
else:
|
||||
line_number = offset_to_line_cache[beg]
|
||||
|
||||
if (end := text.find("\n", m.end(), text_len)) == -1:
|
||||
end = text_len
|
||||
|
||||
yield m, line_number, (beg, end)
|
||||
|
||||
|
||||
def line_to_offset_range(line: int, offset_limit: int, line_to_offset_cache: list[int]) -> tuple[int, int]:
|
||||
"""
|
||||
Given an offset, return line bounds.
|
||||
"""
|
||||
assert line >= 0
|
||||
beg = line_to_offset_cache[line]
|
||||
end = line_to_offset_cache[line + 1] if (line + 1 < len(line_to_offset_cache)) else offset_limit
|
||||
return beg, end
|
||||
Reference in New Issue
Block a user