89 lines
2.6 KiB
Python
89 lines
2.6 KiB
Python
|
|
# SPDX-FileCopyrightText: 2025 Blender Authors
|
||
|
|
#
|
||
|
|
# SPDX-License-Identifier: GPL-2.0-or-later
|
||
|
|
|
||
|
|
"""
|
||
|
|
When writing text checking utilities, it's not always straightforward
|
||
|
|
to find line numbers and ranges from an offset within the text.
|
||
|
|
|
||
|
|
This module provides helpers to efficiently do this.
|
||
|
|
|
||
|
|
The main utility is ``finditer_with_line_numbers_and_bounds``,
|
||
|
|
an alternative to ``re.finditer`` which yields line numbers and offsets
|
||
|
|
for the line bounds - useful for scanning files and reporting errors that include the line contents.
|
||
|
|
"""
|
||
|
|
|
||
|
|
__all__ = (
|
||
|
|
"finditer_newline_cache_compute",
|
||
|
|
"finditer_with_line_numbers_and_bounds",
|
||
|
|
"line_to_offset_range",
|
||
|
|
)
|
||
|
|
|
||
|
|
from collections.abc import (
|
||
|
|
Iterator,
|
||
|
|
)
|
||
|
|
|
||
|
|
import re as _re
|
||
|
|
|
||
|
|
|
||
|
|
def finditer_newline_cache_compute(text: str) -> tuple[dict[int, int], list[int]]:
|
||
|
|
"""
|
||
|
|
Return a tuple containing:
|
||
|
|
Offset to
|
||
|
|
"""
|
||
|
|
# Offset to line lookup.
|
||
|
|
offset_to_line_cache: dict[int, int] = {}
|
||
|
|
# Line to offset lookup.
|
||
|
|
line_to_offset_cache: list[int] = [0]
|
||
|
|
|
||
|
|
for i, m in enumerate(_re.finditer("\\n", text), 1):
|
||
|
|
ofs = m.start()
|
||
|
|
offset_to_line_cache[ofs] = i
|
||
|
|
line_to_offset_cache.append(ofs)
|
||
|
|
|
||
|
|
return offset_to_line_cache, line_to_offset_cache
|
||
|
|
|
||
|
|
|
||
|
|
def finditer_with_line_numbers_and_bounds(
|
||
|
|
pattern: str,
|
||
|
|
text: str,
|
||
|
|
*,
|
||
|
|
offset_to_line_cache: dict[int, int] | None = None,
|
||
|
|
flags: int = 0,
|
||
|
|
) -> Iterator[tuple[_re.Match[str], int, tuple[int, int]]]:
|
||
|
|
"""
|
||
|
|
A version of ``re.finditer`` that returns ``(match, line_number, line_bounds)``.
|
||
|
|
|
||
|
|
Note that ``offset_to_line_cache`` is the first return value from
|
||
|
|
``finditer_newline_cache_compute``.
|
||
|
|
This should be passed in if the iterator is called multiple times
|
||
|
|
on the same buffer, to avoid calculating this every time.
|
||
|
|
"""
|
||
|
|
if offset_to_line_cache is None:
|
||
|
|
offset_to_line_cache, line_to_offset_cache = finditer_newline_cache_compute(text)
|
||
|
|
del line_to_offset_cache
|
||
|
|
|
||
|
|
text_len = len(text)
|
||
|
|
for m in _re.finditer(pattern, text, flags):
|
||
|
|
|
||
|
|
if (beg := text.rfind("\n", 0, m.start())) == -1:
|
||
|
|
beg = 0
|
||
|
|
line_number = 0
|
||
|
|
else:
|
||
|
|
line_number = offset_to_line_cache[beg]
|
||
|
|
|
||
|
|
if (end := text.find("\n", m.end(), text_len)) == -1:
|
||
|
|
end = text_len
|
||
|
|
|
||
|
|
yield m, line_number, (beg, end)
|
||
|
|
|
||
|
|
|
||
|
|
def line_to_offset_range(line: int, offset_limit: int, line_to_offset_cache: list[int]) -> tuple[int, int]:
|
||
|
|
"""
|
||
|
|
Given an offset, return line bounds.
|
||
|
|
"""
|
||
|
|
assert line >= 0
|
||
|
|
beg = line_to_offset_cache[line]
|
||
|
|
end = line_to_offset_cache[line + 1] if (line + 1 < len(line_to_offset_cache)) else offset_limit
|
||
|
|
return beg, end
|