Files
test2/tools/utils_maintenance/modules/line_number_utils.py
Campbell Barton 0265b13399 Tools: add a utility to validate array sizes
The script check_source/static_check_size_comments.py run directly
or called via the convenience target "make check_size_comments".

Add a utility module: `line_number_utils` which implements
a version of `re.finditer` that includes line numbers & ranges.
2025-05-23 14:04:26 +10:00

89 lines
2.6 KiB
Python

# SPDX-FileCopyrightText: 2025 Blender Authors
#
# SPDX-License-Identifier: GPL-2.0-or-later
"""
When writing text checking utilities, it's not always straightforward
to find line numbers and ranges from an offset within the text.
This module provides helpers to efficiently do this.
The main utility is ``finditer_with_line_numbers_and_bounds``,
an alternative to ``re.finditer`` which yields line numbers and offsets
for the line bounds - useful for scanning files and reporting errors that include the line contents.
"""
__all__ = (
"finditer_newline_cache_compute",
"finditer_with_line_numbers_and_bounds",
"line_to_offset_range",
)
from collections.abc import (
Iterator,
)
import re as _re
def finditer_newline_cache_compute(text: str) -> tuple[dict[int, int], list[int]]:
"""
Return a tuple containing:
Offset to
"""
# Offset to line lookup.
offset_to_line_cache: dict[int, int] = {}
# Line to offset lookup.
line_to_offset_cache: list[int] = [0]
for i, m in enumerate(_re.finditer("\\n", text), 1):
ofs = m.start()
offset_to_line_cache[ofs] = i
line_to_offset_cache.append(ofs)
return offset_to_line_cache, line_to_offset_cache
def finditer_with_line_numbers_and_bounds(
pattern: str,
text: str,
*,
offset_to_line_cache: dict[int, int] | None = None,
flags: int = 0,
) -> Iterator[tuple[_re.Match[str], int, tuple[int, int]]]:
"""
A version of ``re.finditer`` that returns ``(match, line_number, line_bounds)``.
Note that ``offset_to_line_cache`` is the first return value from
``finditer_newline_cache_compute``.
This should be passed in if the iterator is called multiple times
on the same buffer, to avoid calculating this every time.
"""
if offset_to_line_cache is None:
offset_to_line_cache, line_to_offset_cache = finditer_newline_cache_compute(text)
del line_to_offset_cache
text_len = len(text)
for m in _re.finditer(pattern, text, flags):
if (beg := text.rfind("\n", 0, m.start())) == -1:
beg = 0
line_number = 0
else:
line_number = offset_to_line_cache[beg]
if (end := text.find("\n", m.end(), text_len)) == -1:
end = text_len
yield m, line_number, (beg, end)
def line_to_offset_range(line: int, offset_limit: int, line_to_offset_cache: list[int]) -> tuple[int, int]:
"""
Given an offset, return line bounds.
"""
assert line >= 0
beg = line_to_offset_cache[line]
end = line_to_offset_cache[line + 1] if (line + 1 < len(line_to_offset_cache)) else offset_limit
return beg, end