Cleanup: add suffix to clarify unicode functions that return an error

There were enough cases of callers ignoring a potential the error value, using the column width for e.g. to calculate pixel sizes, or the size in bytes to calculate buffer offsets. Since text fields & labels can include characters that return an error from BLI_str_utf8_as_unicode, add the suffix to make this explicit.
2023-09-18 13:36:17 +10:00
parent a47f6ffa4d
commit ed552e9e4f
19 changed files with 51 additions and 48 deletions
--- a/source/blender/blenfont/intern/blf_font.cc
+++ b/source/blender/blenfont/intern/blf_font.cc
@@ -985,7 +985,7 @@ size_t blf_str_offset_from_cursor_position(FontBLF *font,
    /* We are to the right of the string, so return position of null terminator. */
    data.r_offset = BLI_strnlen(str, str_len);
  }
-  else if (BLI_str_utf8_char_width(&str[data.r_offset]) == 0) {
+  else if (BLI_str_utf8_char_width_or_error(&str[data.r_offset]) == 0) {
    /* This is a combining character, so move to previous visible valid char. */
    int offset = int(data.r_offset);
    BLI_str_cursor_step_prev_utf8(str, str_len, &offset);
--- a/source/blender/blenfont/intern/blf_glyph.cc
+++ b/source/blender/blenfont/intern/blf_glyph.cc
@@ -1052,7 +1052,7 @@ static FT_GlyphSlot blf_glyph_render(FontBLF *settings_font,
  }

  if ((settings_font->flags & BLF_MONOSPACED) && (settings_font != glyph_font)) {
-    const int col = BLI_wcwidth(char32_t(charcode));
+    const int col = BLI_wcwidth_or_error(char32_t(charcode));
    if (col > 0) {
      blf_glyph_transform_monospace(glyph, col * fixed_width);
    }
--- a/source/blender/blenlib/BLI_string_utf8.h
+++ b/source/blender/blenlib/BLI_string_utf8.h
@@ -37,7 +37,7 @@ int BLI_str_utf8_invalid_strip(char *str, size_t length) ATTR_NONNULL(1);
 * \return The size (in bytes) of a single UTF-8 char.
 * \warning Can return -1 on bad chars.
 */
-int BLI_str_utf8_size(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
+int BLI_str_utf8_size_or_error(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
 /**
 * Use when we want to skip errors.
 */
@@ -53,7 +53,8 @@ int BLI_str_utf8_size_safe(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1
 *
 * Return value: the resulting character
 */
-unsigned int BLI_str_utf8_as_unicode(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
+unsigned int BLI_str_utf8_as_unicode_or_error(const char *p) ATTR_WARN_UNUSED_RESULT
+    ATTR_NONNULL(1);
 /**
 * UTF8 decoding that steps over the index (unless an error is encountered).
 *
@@ -173,9 +174,9 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
 /**
 * Count columns that character/string occupies (based on `wcwidth.co`).
 */
-int BLI_wcwidth(char32_t ucs) ATTR_WARN_UNUSED_RESULT;
+int BLI_wcwidth_or_error(char32_t ucs) ATTR_WARN_UNUSED_RESULT;
 int BLI_wcwidth_safe(char32_t ucs) ATTR_WARN_UNUSED_RESULT;
-int BLI_wcswidth(const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
+int BLI_wcswidth_or_error(const char32_t *pwcs, size_t n) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);

 /**
 * Return the uppercase of a 32-bit character or the character when no case change is needed.
@@ -193,7 +194,7 @@ char32_t BLI_str_utf32_char_to_lower(char32_t wc);
 /**
 * \warning can return -1 on bad chars.
 */
-int BLI_str_utf8_char_width(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
+int BLI_str_utf8_char_width_or_error(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);
 int BLI_str_utf8_char_width_safe(const char *p) ATTR_WARN_UNUSED_RESULT ATTR_NONNULL(1);

 size_t BLI_str_partition_utf8(const char *str,
--- a/source/blender/blenlib/intern/string_cursor_utf8.c
+++ b/source/blender/blenlib/intern/string_cursor_utf8.c
@@ -121,7 +121,8 @@ bool BLI_str_cursor_step_next_utf8(const char *str, size_t str_maxlen, int *pos)
  const char *str_next = str_pos;
  do {
    str_next = BLI_str_find_next_char_utf8(str_next, str_end);
-  } while (str_next < str_end && str_next[0] != 0 && BLI_str_utf8_char_width(str_next) == 0);
+  } while (str_next < str_end && str_next[0] != 0 &&
+           BLI_str_utf8_char_width_or_error(str_next) == 0);
  (*pos) += (str_next - str_pos);
  if ((*pos) > (int)str_maxlen) {
    (*pos) = (int)str_maxlen;
@@ -139,7 +140,7 @@ bool BLI_str_cursor_step_prev_utf8(const char *str, size_t str_maxlen, int *pos)
    const char *str_prev = str_pos;
    do {
      str_prev = BLI_str_find_prev_char_utf8(str_prev, str);
-    } while (str_prev > str && BLI_str_utf8_char_width(str_prev) == 0);
+    } while (str_prev > str && BLI_str_utf8_char_width_or_error(str_prev) == 0);
    (*pos) -= (str_pos - str_prev);
    return true;
  }
@@ -234,7 +235,7 @@ bool BLI_str_cursor_step_next_utf32(const char32_t *str, size_t str_maxlen, int
  }
  do {
    (*pos)++;
-  } while (*pos < (int)str_maxlen && str[*pos] != 0 && BLI_wcwidth(str[*pos]) == 0);
+  } while (*pos < (int)str_maxlen && str[*pos] != 0 && BLI_wcwidth_or_error(str[*pos]) == 0);

  return true;
 }
@@ -248,7 +249,7 @@ bool BLI_str_cursor_step_prev_utf32(const char32_t *str, size_t UNUSED(str_maxle
  }
  do {
    (*pos)--;
-  } while (*pos > 0 && BLI_wcwidth(str[*pos]) == 0);
+  } while (*pos > 0 && BLI_wcwidth_or_error(str[*pos]) == 0);

  return true;
 }
--- a/source/blender/blenlib/intern/string_search.cc
+++ b/source/blender/blenlib/intern/string_search.cc
@@ -111,9 +111,9 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
    return -1;
  }

-  const uint32_t query_first_unicode = BLI_str_utf8_as_unicode(query.data());
-  const uint32_t query_second_unicode = BLI_str_utf8_as_unicode(query.data() +
-                                                                BLI_str_utf8_size(query.data()));
+  const uint32_t query_first_unicode = BLI_str_utf8_as_unicode_or_error(query.data());
+  const uint32_t query_second_unicode = BLI_str_utf8_as_unicode_or_error(
+      query.data() + BLI_str_utf8_size_or_error(query.data()));

  const char *full_begin = full.begin();
  const char *full_end = full.end();
@@ -125,12 +125,12 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
  const int max_acceptable_distance = max_errors + extra_chars;

  for (int i = 0; i < window_size; i++) {
-    window_end += BLI_str_utf8_size(window_end);
+    window_end += BLI_str_utf8_size_or_error(window_end);
  }

  while (true) {
    StringRef window{window_begin, window_end};
-    const uint32_t window_begin_unicode = BLI_str_utf8_as_unicode(window_begin);
+    const uint32_t window_begin_unicode = BLI_str_utf8_as_unicode_or_error(window_begin);
    int distance = 0;
    /* Expect that the first or second character of the query is correct. This helps to avoid
     * computing the more expensive distance function. */
@@ -148,8 +148,8 @@ int get_fuzzy_match_errors(StringRef query, StringRef full)
     * distance can't possibly become as short as required. */
    const int window_offset = std::max(1, distance / 2);
    for (int i = 0; i < window_offset && window_end < full_end; i++) {
-      window_begin += BLI_str_utf8_size(window_begin);
-      window_end += BLI_str_utf8_size(window_end);
+      window_begin += BLI_str_utf8_size_or_error(window_begin);
+      window_end += BLI_str_utf8_size_or_error(window_end);
    }
  }
 }
@@ -352,9 +352,9 @@ void extract_normalized_words(StringRef str,
  const uint32_t unicode_slash = uint32_t('/');
  const uint32_t unicode_right_triangle = UI_MENU_ARROW_SEP_UNICODE;

-  BLI_assert(unicode_space == BLI_str_utf8_as_unicode(" "));
-  BLI_assert(unicode_slash == BLI_str_utf8_as_unicode("/"));
-  BLI_assert(unicode_right_triangle == BLI_str_utf8_as_unicode(UI_MENU_ARROW_SEP));
+  BLI_assert(unicode_space == BLI_str_utf8_as_unicode_or_error(" "));
+  BLI_assert(unicode_slash == BLI_str_utf8_as_unicode_or_error("/"));
+  BLI_assert(unicode_right_triangle == BLI_str_utf8_as_unicode_or_error(UI_MENU_ARROW_SEP));

  auto is_separator = [&](uint32_t unicode) {
    return ELEM(unicode, unicode_space, unicode_slash, unicode_right_triangle);
--- a/source/blender/blenlib/intern/string_utf8.c
+++ b/source/blender/blenlib/intern/string_utf8.c
@@ -451,7 +451,7 @@ size_t BLI_strncpy_wchar_from_utf8(wchar_t *__restrict dst_w,
 /* end wchar_t / utf8 functions */
 /* -------------------------------------------------------------------- */

-int BLI_wcwidth(char32_t ucs)
+int BLI_wcwidth_or_error(char32_t ucs)
 {
  /* Treat private use areas (icon fonts), symbols, and emoticons as double-width. */
  if (ucs >= 0xf0000 || (ucs >= 0xe000 && ucs < 0xf8ff) || (ucs >= 0x1f300 && ucs < 0x1fbff)) {
@@ -462,31 +462,31 @@ int BLI_wcwidth(char32_t ucs)

 int BLI_wcwidth_safe(char32_t ucs)
 {
-  const int columns = BLI_wcwidth(ucs);
+  const int columns = BLI_wcwidth_or_error(ucs);
  if (columns >= 0) {
    return columns;
  }
  return 1;
 }

-int BLI_wcswidth(const char32_t *pwcs, size_t n)
+int BLI_wcswidth_or_error(const char32_t *pwcs, size_t n)
 {
  return mk_wcswidth(pwcs, n);
 }

-int BLI_str_utf8_char_width(const char *p)
+int BLI_str_utf8_char_width_or_error(const char *p)
 {
-  uint unicode = BLI_str_utf8_as_unicode(p);
+  uint unicode = BLI_str_utf8_as_unicode_or_error(p);
  if (unicode == BLI_UTF8_ERR) {
    return -1;
  }

-  return BLI_wcwidth((char32_t)unicode);
+  return BLI_wcwidth_or_error((char32_t)unicode);
 }

 int BLI_str_utf8_char_width_safe(const char *p)
 {
-  uint unicode = BLI_str_utf8_as_unicode(p);
+  uint unicode = BLI_str_utf8_as_unicode_or_error(p);
  if (unicode == BLI_UTF8_ERR) {
    return 1;
  }
@@ -723,7 +723,7 @@ char32_t BLI_str_utf32_char_to_lower(const char32_t wc)

 /** \} */ /* -------------------------------------------------------------------- */

-int BLI_str_utf8_size(const char *p)
+int BLI_str_utf8_size_or_error(const char *p)
 {
  return utf8_char_compute_skip_or_error(*p);
 }
@@ -733,7 +733,7 @@ int BLI_str_utf8_size_safe(const char *p)
  return utf8_char_compute_skip(*p);
 }

-uint BLI_str_utf8_as_unicode(const char *p)
+uint BLI_str_utf8_as_unicode_or_error(const char *p)
 {
  /* Originally `g_utf8_get_char` in GLIB. */

@@ -982,7 +982,7 @@ size_t BLI_str_partition_ex_utf8(const char *str,
  }

  /* Note that here, we assume end points to a valid utf8 char! */
-  BLI_assert((end >= str) && (BLI_str_utf8_as_unicode(end) != BLI_UTF8_ERR));
+  BLI_assert((end >= str) && (BLI_str_utf8_as_unicode_or_error(end) != BLI_UTF8_ERR));

  char *suf = (char *)(str + str_len);
  size_t index = 0;
--- a/source/blender/blenlib/tests/BLI_string_test.cc
+++ b/source/blender/blenlib/tests/BLI_string_test.cc
@@ -71,7 +71,7 @@ TEST(string, StrCopyUTF8_TruncateEncoding)
 #define STRNCPY_UTF8_TRUNCATE(byte_size, ...) \
  { \
    const char src[] = {__VA_ARGS__, 0}; \
-    EXPECT_EQ(BLI_str_utf8_size(src), byte_size); \
+    EXPECT_EQ(BLI_str_utf8_size_or_error(src), byte_size); \
    char dst[sizeof(src)]; \
    memset(dst, 0xff, sizeof(dst)); \
    STRNCPY_UTF8(dst, src); \
@@ -98,7 +98,7 @@ TEST(string, StrCopyUTF8_TerminateEncodingEarly)
 #define STRNCPY_UTF8_TERMINATE_EARLY(byte_size, ...) \
  { \
    char src[] = {__VA_ARGS__, 0}; \
-    EXPECT_EQ(BLI_str_utf8_size(src), byte_size); \
+    EXPECT_EQ(BLI_str_utf8_size_or_error(src), byte_size); \
    char dst[sizeof(src)]; \
    memset(dst, 0xff, sizeof(dst)); \
    STRNCPY_UTF8(dst, src); \
--- a/source/blender/editors/curve/editfont.cc
+++ b/source/blender/editors/curve/editfont.cc
@@ -1753,13 +1753,13 @@ static int insert_text_invoke(bContext *C, wmOperator *op, const wmEvent *event)
      if (accentcode) {
        if (ef->pos > 0) {
          inserted_text[0] = findaccent(ef->textbuf[ef->pos - 1],
-                                        BLI_str_utf8_as_unicode(event->utf8_buf));
+                                        BLI_str_utf8_as_unicode_or_error(event->utf8_buf));
          ef->textbuf[ef->pos - 1] = inserted_text[0];
        }
        accentcode = false;
      }
      else if (event->utf8_buf[0]) {
-        inserted_text[0] = BLI_str_utf8_as_unicode(event->utf8_buf);
+        inserted_text[0] = BLI_str_utf8_as_unicode_or_error(event->utf8_buf);
        insert_into_textbuf(obedit, inserted_text[0]);
        accentcode = false;
      }
--- a/source/blender/editors/interface/interface_handlers.cc
+++ b/source/blender/editors/interface/interface_handlers.cc
@@ -3983,7 +3983,7 @@ static void ui_do_but_textedit(
      }

      if (utf8_buf[0]) {
-        const int utf8_buf_len = BLI_str_utf8_size(utf8_buf);
+        const int utf8_buf_len = BLI_str_utf8_size_or_error(utf8_buf);
        BLI_assert(utf8_buf_len != -1);
        changed = ui_textedit_insert_buf(but, data, utf8_buf, utf8_buf_len);
      }
--- a/source/blender/editors/interface/interface_widgets.cc
+++ b/source/blender/editors/interface/interface_widgets.cc
@@ -1745,7 +1745,8 @@ static void ui_text_clip_cursor(const uiFontStyle *fstyle, uiBut *but, const rct
        if (width < 20 && but->ofs > 0) {
          ui_text_clip_give_prev_off(but, but->editstr);
        }
-        bytes = BLI_str_utf8_size(BLI_str_find_prev_char_utf8(but->editstr + len, but->editstr));
+        bytes = BLI_str_utf8_size_or_error(
+            BLI_str_find_prev_char_utf8(but->editstr + len, but->editstr));
        if (bytes == -1) {
          bytes = 1;
        }
--- a/source/blender/editors/space_text/text_autocomplete.cc
+++ b/source/blender/editors/space_text/text_autocomplete.cc
@@ -177,8 +177,8 @@ static GHash *text_autocomplete_build(Text *text)
        if ((i_start != i_end) &&
            /* Check we're at the beginning of a line or that the previous char is not an
             * identifier this prevents digits from being added. */
-            ((i_start < 1) ||
-             !text_check_identifier_unicode(BLI_str_utf8_as_unicode(&linep->line[i_start - 1]))))
+            ((i_start < 1) || !text_check_identifier_unicode(
+                                  BLI_str_utf8_as_unicode_or_error(&linep->line[i_start - 1]))))
        {
          char *str_sub = &linep->line[i_start];
          const int choice_len = i_end - i_start;
--- a/source/blender/editors/space_text/text_ops.cc
+++ b/source/blender/editors/space_text/text_ops.cc
@@ -3629,7 +3629,7 @@ static int text_insert_invoke(bContext *C, wmOperator *op, const wmEvent *event)
    RNA_string_set(op->ptr, "text", str);

    if (U.text_flag & USER_TEXT_EDIT_AUTO_CLOSE) {
-      auto_close_char_input = BLI_str_utf8_as_unicode(str);
+      auto_close_char_input = BLI_str_utf8_as_unicode_or_error(str);
      if (isascii(auto_close_char_input)) {
        auto_close_char_match = text_closing_character_pair_get(auto_close_char_input);
        if (auto_close_char_match != 0) {
--- a/source/blender/editors/util/numinput.cc
+++ b/source/blender/editors/util/numinput.cc
@@ -562,7 +562,7 @@ bool handleNumInput(bContext *C, NumInput *n, const wmEvent *event)
      }
    }

-    if (!editstr_insert_at_cursor(n, utf8_buf, BLI_str_utf8_size(utf8_buf))) {
+    if (!editstr_insert_at_cursor(n, utf8_buf, BLI_str_utf8_size_or_error(utf8_buf))) {
      return false;
    }

--- a/source/blender/makesrna/intern/rna_wm.cc
+++ b/source/blender/makesrna/intern/rna_wm.cc
@@ -674,7 +674,7 @@ static int rna_Event_unicode_length(PointerRNA *ptr)
  const wmEvent *event = static_cast<wmEvent *>(ptr->data);
  if (event->utf8_buf[0]) {
    /* invalid value is checked on assignment so we don't need to account for this */
-    return BLI_str_utf8_size(event->utf8_buf);
+    return BLI_str_utf8_size_or_error(event->utf8_buf);
  }
  else {
    return 0;
--- a/source/blender/makesrna/intern/rna_wm_api.cc
+++ b/source/blender/makesrna/intern/rna_wm_api.cc
@@ -659,7 +659,7 @@ static wmEvent *rna_Window_event_add_simulate(wmWindow *win,
  /* TODO: validate NDOF. */

  if (unicode != nullptr) {
-    int len = BLI_str_utf8_size(unicode);
+    int len = BLI_str_utf8_size_or_error(unicode);
    if (len == -1 || unicode[len] != '\0') {
      BKE_report(reports, RPT_ERROR, "Only a single character supported");
      return nullptr;
--- a/source/blender/windowmanager/WM_types.hh
+++ b/source/blender/windowmanager/WM_types.hh
@@ -708,7 +708,7 @@ struct wmEvent {
  int mval[2];
  /**
   * A single UTF8 encoded character.
-   * #BLI_str_utf8_size() must _always_ return a valid value,
+   * #BLI_str_utf8_size_or_error() must _always_ return a valid value,
   * check when assigning so we don't need to check on every access after.
   */
  char utf8_buf[6];
--- a/source/blender/windowmanager/intern/wm_event_query.cc
+++ b/source/blender/windowmanager/intern/wm_event_query.cc
@@ -129,7 +129,7 @@ void WM_event_print(const wmEvent *event)
        flag_id,
        event->xy[0],
        event->xy[1],
-        BLI_str_utf8_size(event->utf8_buf),
+        BLI_str_utf8_size_or_error(event->utf8_buf),
        event->utf8_buf,
        (const void *)event);

@@ -445,7 +445,7 @@ void WM_event_drag_start_xy(const wmEvent *event, int r_xy[2])

 char WM_event_utf8_to_ascii(const wmEvent *event)
 {
-  if (BLI_str_utf8_size(event->utf8_buf) == 1) {
+  if (BLI_str_utf8_size_or_error(event->utf8_buf) == 1) {
    return event->utf8_buf[0];
  }
  return '\0';
--- a/source/blender/windowmanager/intern/wm_event_system.cc
+++ b/source/blender/windowmanager/intern/wm_event_system.cc
@@ -5694,7 +5694,7 @@ void wm_event_add_ghostevent(wmWindowManager *wm, wmWindow *win, const int type,
                     utf8_buf_len);
        }

-        if (BLI_str_utf8_size(event.utf8_buf) == -1) {
+        if (BLI_str_utf8_size_or_error(event.utf8_buf) == -1) {
          CLOG_ERROR(WM_LOG_EVENTS,
                     "ghost detected an invalid unicode character '%d'",
                     int(uchar(event.utf8_buf[0])));
--- a/source/blender/windowmanager/intern/wm_keymap.cc
+++ b/source/blender/windowmanager/intern/wm_keymap.cc
@@ -1059,7 +1059,7 @@ static const char *key_event_glyph_or_text(const int font_id,
                                           const char *single_glyph)
 {
  BLI_assert(single_glyph == nullptr || (BLI_strlen_utf8(single_glyph) == 1));
-  return (single_glyph && BLF_has_glyph(font_id, BLI_str_utf8_as_unicode(single_glyph))) ?
+  return (single_glyph && BLF_has_glyph(font_id, BLI_str_utf8_as_unicode_or_error(single_glyph))) ?
             single_glyph :
             text;
 }