BLI_string: prevent buffer overflow for BLI_str_utf8_offset_* functions
Strings that include Latin1 encoding or corrupt UTF8 byte sequences
could read past the buffer bounds (stepping over the null terminator).
Resolve by passing in the string length.
Other changes to support non-UTF8 byte sequences:
- BLI_str_utf8_offset_{to/from}_index were accumulating
the UTF8 offset without accounting for non-UTF8 characters
which could cause a buffer underflow or enter an eternal loop.
- BLI_str_utf8_offset_to_index would read past the buffer bounds if the
offset passed in if it was in the middle of a UTF8 byte sequence.
This commit is contained in:
@@ -776,9 +776,9 @@ void txt_move_up(Text *text, const bool sel)
|
||||
}
|
||||
|
||||
if ((*linep)->prev) {
|
||||
int column = BLI_str_utf8_offset_to_column((*linep)->line, *charp);
|
||||
int column = BLI_str_utf8_offset_to_column((*linep)->line, (*linep)->len, *charp);
|
||||
*linep = (*linep)->prev;
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, column);
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, (*linep)->len, column);
|
||||
}
|
||||
else {
|
||||
txt_move_bol(text, sel);
|
||||
@@ -806,9 +806,9 @@ void txt_move_down(Text *text, const bool sel)
|
||||
}
|
||||
|
||||
if ((*linep)->next) {
|
||||
int column = BLI_str_utf8_offset_to_column((*linep)->line, *charp);
|
||||
int column = BLI_str_utf8_offset_to_column((*linep)->line, (*linep)->len, *charp);
|
||||
*linep = (*linep)->next;
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, column);
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, (*linep)->len, column);
|
||||
}
|
||||
else {
|
||||
txt_move_eol(text, sel);
|
||||
@@ -1320,9 +1320,9 @@ void txt_sel_set(Text *text, int startl, int startc, int endl, int endc)
|
||||
CLAMP(endc, 0, tollen);
|
||||
|
||||
text->curl = froml;
|
||||
text->curc = BLI_str_utf8_offset_from_index(froml->line, startc);
|
||||
text->curc = BLI_str_utf8_offset_from_index(froml->line, froml->len, startc);
|
||||
text->sell = tol;
|
||||
text->selc = BLI_str_utf8_offset_from_index(tol->line, endc);
|
||||
text->selc = BLI_str_utf8_offset_from_index(tol->line, tol->len, endc);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -211,14 +211,18 @@ size_t BLI_str_partition_ex_utf8(const char *str,
|
||||
const char **r_suf,
|
||||
bool from_right) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1, 3, 4, 5);
|
||||
|
||||
int BLI_str_utf8_offset_to_index(const char *str, int offset) ATTR_WARN_UNUSED_RESULT
|
||||
ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_from_index(const char *str, int index) ATTR_WARN_UNUSED_RESULT
|
||||
ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_to_column(const char *str, int offset) ATTR_WARN_UNUSED_RESULT
|
||||
ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_from_column(const char *str, int column) ATTR_WARN_UNUSED_RESULT
|
||||
ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_to_index(const char *str,
|
||||
size_t str_len,
|
||||
int offset_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_from_index(const char *str,
|
||||
size_t str_len,
|
||||
int index_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_to_column(const char *str,
|
||||
size_t str_len,
|
||||
int offset_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
|
||||
int BLI_str_utf8_offset_from_column(const char *str,
|
||||
size_t str_len,
|
||||
int column_target) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
|
||||
|
||||
/** Size in bytes. */
|
||||
#define BLI_UTF8_MAX 6
|
||||
|
||||
@@ -1016,50 +1016,72 @@ size_t BLI_str_partition_ex_utf8(const char *str,
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Offset Conversion in Strings
|
||||
*
|
||||
* \note Regarding the assertion: `BLI_assert(offset <= offset_target)`
|
||||
* The `offset_target` is likely in the middle of a UTF8 byte-sequence.
|
||||
* Most likely the offset passed in is incorrect, although it may be impractical to
|
||||
* avoid this happening in the case of invalid UTF8 byte sequences.
|
||||
* If the assert is impractical to avoid, it could be demoted to a warning.
|
||||
* \{ */
|
||||
|
||||
int BLI_str_utf8_offset_to_index(const char *str, int offset)
|
||||
int BLI_str_utf8_offset_to_index(const char *str, const size_t str_len, const int offset_target)
|
||||
{
|
||||
int index = 0, pos = 0;
|
||||
while (pos != offset) {
|
||||
pos += BLI_str_utf8_size(str + pos);
|
||||
BLI_assert(offset_target >= 0);
|
||||
const size_t offset_target_as_size = (size_t)offset_target;
|
||||
size_t offset = 0;
|
||||
int index = 0;
|
||||
/* Note that `offset != offset_target_as_size` works for valid utf8 strings. */
|
||||
while ((offset < str_len) && (offset < offset_target_as_size)) {
|
||||
/* Use instead of #BLI_str_utf8_size_safe to match behavior when limiting the string length. */
|
||||
const uint code = BLI_str_utf8_as_unicode_step(str, str_len, &offset);
|
||||
UNUSED_VARS(code);
|
||||
index++;
|
||||
BLI_assert(offset <= offset_target_as_size); /* See DOXY section comment. */
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
int BLI_str_utf8_offset_from_index(const char *str, int index)
|
||||
int BLI_str_utf8_offset_from_index(const char *str, const size_t str_len, const int index_target)
|
||||
{
|
||||
int offset = 0, pos = 0;
|
||||
while (pos != index) {
|
||||
offset += BLI_str_utf8_size(str + offset);
|
||||
pos++;
|
||||
BLI_assert(index_target >= 0);
|
||||
size_t offset = 0;
|
||||
int index = 0;
|
||||
while ((offset < str_len) && (index < index_target)) {
|
||||
/* Use instead of #BLI_str_utf8_size_safe to match behavior when limiting the string length. */
|
||||
const uint code = BLI_str_utf8_as_unicode_step(str, str_len, &offset);
|
||||
UNUSED_VARS(code);
|
||||
index++;
|
||||
}
|
||||
return offset;
|
||||
}
|
||||
|
||||
int BLI_str_utf8_offset_to_column(const char *str, int offset)
|
||||
int BLI_str_utf8_offset_to_column(const char *str, const size_t str_len, const int offset_target)
|
||||
{
|
||||
int column = 0, pos = 0;
|
||||
while (pos < offset) {
|
||||
column += BLI_str_utf8_char_width_safe(str + pos);
|
||||
pos += BLI_str_utf8_size_safe(str + pos);
|
||||
BLI_assert(offset_target >= 0);
|
||||
const size_t offset_target_clamp = MIN2((size_t)offset_target, str_len);
|
||||
size_t offset = 0;
|
||||
int column = 0;
|
||||
while (offset < offset_target_clamp) {
|
||||
const uint code = BLI_str_utf8_as_unicode_step(str, str_len, &offset);
|
||||
column += BLI_wcwidth_safe(code);
|
||||
BLI_assert(offset <= (size_t)offset_target); /* See DOXY section comment. */
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
int BLI_str_utf8_offset_from_column(const char *str, int column)
|
||||
int BLI_str_utf8_offset_from_column(const char *str, const size_t str_len, const int column_target)
|
||||
{
|
||||
int offset = 0, pos = 0;
|
||||
while (*(str + offset) && pos < column) {
|
||||
const int col = BLI_str_utf8_char_width_safe(str + offset);
|
||||
if (pos + col > column) {
|
||||
size_t offset = 0, offset_next = 0;
|
||||
int column = 0;
|
||||
while ((offset < str_len) && (column < column_target)) {
|
||||
const uint code = BLI_str_utf8_as_unicode_step(str, str_len, &offset_next);
|
||||
column += BLI_wcwidth_safe(code);
|
||||
if (column > column_target) {
|
||||
break;
|
||||
}
|
||||
offset += BLI_str_utf8_size_safe(str + offset);
|
||||
pos += col;
|
||||
offset = offset_next;
|
||||
}
|
||||
return offset;
|
||||
return (int)offset;
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -69,8 +69,8 @@ static void textview_draw_sel(const char *str,
|
||||
const int lheight = tds->lheight;
|
||||
|
||||
if (sel[0] <= str_len_draw && sel[1] >= 0) {
|
||||
const int sta = BLI_str_utf8_offset_to_column(str, max_ii(sel[0], 0));
|
||||
const int end = BLI_str_utf8_offset_to_column(str, min_ii(sel[1], str_len_draw));
|
||||
const int sta = BLI_str_utf8_offset_to_column(str, str_len_draw, max_ii(sel[0], 0));
|
||||
const int end = BLI_str_utf8_offset_to_column(str, str_len_draw, min_ii(sel[1], str_len_draw));
|
||||
|
||||
GPU_blend(GPU_BLEND_ALPHA);
|
||||
|
||||
@@ -157,8 +157,8 @@ static bool textview_draw_string(TextViewDrawState *tds,
|
||||
}
|
||||
|
||||
/* Last part. */
|
||||
ofs += BLI_str_utf8_offset_from_column(str + ofs,
|
||||
int(floor(float(tds->mval[0]) / tds->cwidth)));
|
||||
ofs += BLI_str_utf8_offset_from_column(
|
||||
str + ofs, str_len - ofs, int(floor(float(tds->mval[0]) / tds->cwidth)));
|
||||
|
||||
CLAMP(ofs, 0, str_len);
|
||||
*tds->mval_pick_offset += str_len - ofs;
|
||||
|
||||
@@ -229,7 +229,7 @@ void wrap_offset(
|
||||
}
|
||||
|
||||
max = wrap_width(st, region);
|
||||
cursin = BLI_str_utf8_offset_to_column(linein->line, cursin);
|
||||
cursin = BLI_str_utf8_offset_to_column(linein->line, linein->len, cursin);
|
||||
|
||||
while (linep) {
|
||||
start = 0;
|
||||
@@ -311,7 +311,7 @@ void wrap_offset_in_line(
|
||||
end = max;
|
||||
chop = 1;
|
||||
*offc = 0;
|
||||
cursin = BLI_str_utf8_offset_to_column(linein->line, cursin);
|
||||
cursin = BLI_str_utf8_offset_to_column(linein->line, linein->len, cursin);
|
||||
|
||||
for (i = 0, j = 0; linein->line[j]; j += BLI_str_utf8_size_safe(linein->line + j)) {
|
||||
int columns = BLI_str_utf8_char_width_safe(linein->line + j); /* = 1 for tab */
|
||||
@@ -1334,7 +1334,7 @@ static void draw_brackets(const SpaceText *st, const TextDrawContext *tdc, ARegi
|
||||
|
||||
linep = startl;
|
||||
c = startc;
|
||||
fc = BLI_str_utf8_offset_to_index(linep->line, startc);
|
||||
fc = BLI_str_utf8_offset_to_index(linep->line, linep->len, startc);
|
||||
endl = nullptr;
|
||||
endc = -1;
|
||||
find = -b;
|
||||
|
||||
@@ -1963,7 +1963,7 @@ static void txt_wrap_move_bol(SpaceText *st, ARegion *region, const bool sel)
|
||||
|
||||
if (j >= oldc) {
|
||||
if (ch == '\0') {
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, start);
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, (*linep)->len, start);
|
||||
}
|
||||
loop = 0;
|
||||
break;
|
||||
@@ -1979,7 +1979,7 @@ static void txt_wrap_move_bol(SpaceText *st, ARegion *region, const bool sel)
|
||||
}
|
||||
else if (ELEM(ch, ' ', '-', '\0')) {
|
||||
if (j >= oldc) {
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, start);
|
||||
*charp = BLI_str_utf8_offset_from_column((*linep)->line, (*linep)->len, start);
|
||||
loop = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -104,7 +104,8 @@ static void rna_Text_select_end_line_index_set(PointerRNA *ptr, int value)
|
||||
static int rna_Text_current_character_get(PointerRNA *ptr)
|
||||
{
|
||||
Text *text = static_cast<Text *>(ptr->data);
|
||||
return BLI_str_utf8_offset_to_index(text->curl->line, text->curc);
|
||||
const TextLine *line = text->curl;
|
||||
return BLI_str_utf8_offset_to_index(line->line, line->len, text->curc);
|
||||
}
|
||||
|
||||
static void rna_Text_current_character_set(PointerRNA *ptr, int index)
|
||||
@@ -113,13 +114,14 @@ static void rna_Text_current_character_set(PointerRNA *ptr, int index)
|
||||
TextLine *line = text->curl;
|
||||
const int len_utf8 = BLI_strlen_utf8(line->line);
|
||||
CLAMP_MAX(index, len_utf8);
|
||||
text->curc = BLI_str_utf8_offset_from_index(line->line, index);
|
||||
text->curc = BLI_str_utf8_offset_from_index(line->line, line->len, index);
|
||||
}
|
||||
|
||||
static int rna_Text_select_end_character_get(PointerRNA *ptr)
|
||||
{
|
||||
Text *text = static_cast<Text *>(ptr->data);
|
||||
return BLI_str_utf8_offset_to_index(text->sell->line, text->selc);
|
||||
TextLine *line = text->sell;
|
||||
return BLI_str_utf8_offset_to_index(line->line, line->len, text->selc);
|
||||
}
|
||||
|
||||
static void rna_Text_select_end_character_set(PointerRNA *ptr, int index)
|
||||
@@ -128,7 +130,7 @@ static void rna_Text_select_end_character_set(PointerRNA *ptr, int index)
|
||||
TextLine *line = text->sell;
|
||||
const int len_utf8 = BLI_strlen_utf8(line->line);
|
||||
CLAMP_MAX(index, len_utf8);
|
||||
text->selc = BLI_str_utf8_offset_from_index(line->line, index);
|
||||
text->selc = BLI_str_utf8_offset_from_index(line->line, line->len, index);
|
||||
}
|
||||
|
||||
static void rna_TextLine_body_get(PointerRNA *ptr, char *value)
|
||||
|
||||
@@ -21,8 +21,10 @@ static void node_build_multi_function(NodeMultiFunctionBuilder &builder)
|
||||
static auto slice_fn = mf::build::SI3_SO<std::string, int, int, std::string>(
|
||||
"Slice", [](const std::string &str, int a, int b) {
|
||||
const int len = BLI_strlen_utf8(str.c_str());
|
||||
const int start = BLI_str_utf8_offset_from_index(str.c_str(), std::clamp(a, 0, len));
|
||||
const int end = BLI_str_utf8_offset_from_index(str.c_str(), std::clamp(a + b, 0, len));
|
||||
const int start = BLI_str_utf8_offset_from_index(
|
||||
str.c_str(), str.size(), std::clamp(a, 0, len));
|
||||
const int end = BLI_str_utf8_offset_from_index(
|
||||
str.c_str(), str.size(), std::clamp(a + b, 0, len));
|
||||
return str.substr(start, std::max<int>(end - start, 0));
|
||||
});
|
||||
builder.set_matching_fn(&slice_fn);
|
||||
|
||||
@@ -233,7 +233,8 @@ static std::optional<TextLayout> get_text_layout(GeoNodeExecParams ¶ms)
|
||||
layout.positions.append(float2(ct.xof, ct.yof) * layout.final_font_size);
|
||||
|
||||
if ((info[i].flag & CU_CHINFO_OVERFLOW) && (cu.overflow == CU_OVERFLOW_TRUNCATE)) {
|
||||
const int offset = BLI_str_utf8_offset_from_index(layout.text.c_str(), i + 1);
|
||||
const int offset = BLI_str_utf8_offset_from_index(
|
||||
layout.text.c_str(), layout.text.size(), i + 1);
|
||||
layout.truncated_text = layout.text.substr(offset);
|
||||
layout.text = layout.text.substr(0, offset);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user