427 lines
13 KiB
C++
427 lines
13 KiB
C++
/* SPDX-FileCopyrightText: 2011 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup bli
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
|
|
#include "BLI_string_utf8.h"
|
|
#include "BLI_utildefines.h"
|
|
|
|
#include "BLI_string_cursor_utf8.h" /* own include */
|
|
|
|
#include "BLI_strict_flags.h" /* IWYU pragma: keep. Keep last. */
|
|
|
|
/**
|
|
* The category of character as returned by #cursor_delim_type_unicode.
|
|
*
|
|
* \note Don't compare with any values besides #STRCUR_DELIM_NONE as cursor motion
|
|
* should only delimit on changes, not treat some groups differently.
|
|
*
|
|
* For range calculation the order prioritizes expansion direction,
|
|
* when the cursor is between two different categories, "hug" the smaller values.
|
|
* Where white-space gets lowest priority. See #BLI_str_cursor_step_bounds_utf8.
|
|
* This is done so expanding the range at a word boundary always chooses the word instead
|
|
* of the white-space before or after it.
|
|
*/
|
|
enum eStrCursorDelimType {
|
|
STRCUR_DELIM_NONE,
|
|
STRCUR_DELIM_ALPHANUMERIC,
|
|
STRCUR_DELIM_PUNCT,
|
|
STRCUR_DELIM_BRACE,
|
|
STRCUR_DELIM_OPERATOR,
|
|
STRCUR_DELIM_QUOTE,
|
|
STRCUR_DELIM_OTHER,
|
|
STRCUR_DELIM_WHITESPACE,
|
|
};
|
|
|
|
static eStrCursorDelimType cursor_delim_type_unicode(const uint uch)
|
|
{
|
|
switch (uch) {
|
|
case ',':
|
|
case '.':
|
|
case 0x2026: /* Horizontal ellipsis. */
|
|
case 0x3002: /* CJK full width full stop. */
|
|
case 0xFF0C: /* CJK full width comma. */
|
|
case 0xFF61: /* CJK half width full stop. */
|
|
return STRCUR_DELIM_PUNCT;
|
|
|
|
case '{':
|
|
case '}':
|
|
case '[':
|
|
case ']':
|
|
case '(':
|
|
case ')':
|
|
case 0x3010: /* CJK full width left black lenticular bracket. */
|
|
case 0x3011: /* CJK full width right black lenticular bracket. */
|
|
case 0xFF08: /* CJK full width left parenthesis. */
|
|
case 0xFF09: /* CJK full width right parenthesis. */
|
|
return STRCUR_DELIM_BRACE;
|
|
|
|
case '+':
|
|
case '-':
|
|
case '=':
|
|
case '~':
|
|
case '%':
|
|
case '/':
|
|
case '<':
|
|
case '>':
|
|
case '^':
|
|
case '*':
|
|
case '&':
|
|
case '|':
|
|
case 0x2014: /* Em dash. */
|
|
case 0x300A: /* CJK full width left double angle bracket. */
|
|
case 0x300B: /* CJK full width right double angle bracket. */
|
|
case 0xFF0F: /* CJK full width solidus (forward slash). */
|
|
case 0xFF5E: /* CJK full width tilde. */
|
|
return STRCUR_DELIM_OPERATOR;
|
|
|
|
case '\'':
|
|
case '\"':
|
|
case '`':
|
|
case 0xB4: /* Acute accent. */
|
|
case 0x2018: /* Left single quotation mark. */
|
|
case 0x2019: /* Right single quotation mark. */
|
|
case 0x201C: /* Left double quotation mark. */
|
|
case 0x201D: /* Right double quotation mark. */
|
|
return STRCUR_DELIM_QUOTE;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '\n':
|
|
return STRCUR_DELIM_WHITESPACE;
|
|
|
|
case '\\':
|
|
case '@':
|
|
case '#':
|
|
case '$':
|
|
case ':':
|
|
case ';':
|
|
case '?':
|
|
case '!':
|
|
case 0xA3: /* Pound sign. */
|
|
case 0x80: /* Euro sign. */
|
|
case 0x3001: /* CJK ideographic comma. */
|
|
case 0xFF01: /* CJK full width exclamation mark. */
|
|
case 0xFF64: /* CJK half width ideographic comma. */
|
|
case 0xFF65: /* Katakana half width middle dot. */
|
|
case 0xFF1A: /* CJK full width colon. */
|
|
case 0xFF1B: /* CJK full width semicolon. */
|
|
case 0xFF1F: /* CJK full width question mark. */
|
|
/* case '_': */ /* special case, for python */
|
|
return STRCUR_DELIM_OTHER;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return STRCUR_DELIM_ALPHANUMERIC; /* Not quite true, but ok for now */
|
|
}
|
|
|
|
static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8,
|
|
const int ch_utf8_len,
|
|
const int pos)
|
|
{
|
|
BLI_assert(ch_utf8_len >= 0);
|
|
/* for full unicode support we really need to have large lookup tables to figure
|
|
* out what's what in every possible char set - and python, glib both have these. */
|
|
size_t index = size_t(pos);
|
|
uint uch = BLI_str_utf8_as_unicode_step_or_error(ch_utf8, size_t(ch_utf8_len), &index);
|
|
return cursor_delim_type_unicode(uch);
|
|
}
|
|
|
|
bool BLI_str_cursor_step_next_utf8(const char *str, const int str_maxlen, int *pos)
|
|
{
|
|
/* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf32. */
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(*pos >= 0);
|
|
|
|
if (*pos >= str_maxlen) {
|
|
return false;
|
|
}
|
|
const char *str_end = str + (str_maxlen + 1);
|
|
const char *str_pos = str + *pos;
|
|
const char *str_next = str_pos;
|
|
do {
|
|
str_next = BLI_str_find_next_char_utf8(str_next, str_end);
|
|
} while ((str_next < str_end) && (str_next[0] != 0) &&
|
|
(BLI_str_utf8_char_width_or_error(str_next) == 0));
|
|
*pos += int(str_next - str_pos);
|
|
*pos = std::min(*pos, str_maxlen);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BLI_str_cursor_step_prev_utf8(const char *str, const int str_maxlen, int *pos)
|
|
{
|
|
/* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf32. */
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(*pos >= 0);
|
|
|
|
if ((*pos > 0) && (*pos <= str_maxlen)) {
|
|
const char *str_pos = str + *pos;
|
|
const char *str_prev = str_pos;
|
|
do {
|
|
str_prev = BLI_str_find_prev_char_utf8(str_prev, str);
|
|
} while ((str_prev > str) && (BLI_str_utf8_char_width_or_error(str_prev) == 0));
|
|
*pos -= int(str_pos - str_prev);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void BLI_str_cursor_step_utf8(const char *str,
|
|
const int str_maxlen,
|
|
int *pos,
|
|
eStrCursorJumpDirection direction,
|
|
eStrCursorJumpType jump,
|
|
bool use_init_step)
|
|
{
|
|
BLI_assert(str_maxlen >= 0);
|
|
const int pos_orig = *pos;
|
|
|
|
if (direction == STRCUR_DIR_NEXT) {
|
|
if (use_init_step) {
|
|
BLI_str_cursor_step_next_utf8(str, str_maxlen, pos);
|
|
}
|
|
else {
|
|
BLI_assert(jump == STRCUR_JUMP_DELIM);
|
|
}
|
|
|
|
if (jump != STRCUR_JUMP_NONE) {
|
|
const eStrCursorDelimType delim_type = (*pos < str_maxlen) ?
|
|
cursor_delim_type_utf8(str, str_maxlen, *pos) :
|
|
STRCUR_DELIM_NONE;
|
|
/* jump between special characters (/,\,_,-, etc.),
|
|
* look at function cursor_delim_type() for complete
|
|
* list of special character, ctr -> */
|
|
while (*pos < str_maxlen) {
|
|
if (BLI_str_cursor_step_next_utf8(str, str_maxlen, pos)) {
|
|
if (*pos == str_maxlen) {
|
|
break;
|
|
}
|
|
if ((jump != STRCUR_JUMP_ALL) &&
|
|
(delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos)))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
break; /* unlikely but just in case */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (direction == STRCUR_DIR_PREV) {
|
|
if (use_init_step) {
|
|
BLI_str_cursor_step_prev_utf8(str, str_maxlen, pos);
|
|
}
|
|
else {
|
|
BLI_assert(jump == STRCUR_JUMP_DELIM);
|
|
}
|
|
|
|
if (jump != STRCUR_JUMP_NONE) {
|
|
const eStrCursorDelimType delim_type = (*pos > 0) ? cursor_delim_type_utf8(
|
|
str, str_maxlen, *pos - 1) :
|
|
STRCUR_DELIM_NONE;
|
|
/* jump between special characters (/,\,_,-, etc.),
|
|
* look at function cursor_delim_type() for complete
|
|
* list of special character, ctr -> */
|
|
while (*pos > 0) {
|
|
const int pos_prev = *pos;
|
|
if (BLI_str_cursor_step_prev_utf8(str, str_maxlen, pos)) {
|
|
if ((jump != STRCUR_JUMP_ALL) &&
|
|
(delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos)))
|
|
{
|
|
/* left only: compensate for index/change in direction */
|
|
if ((pos_orig - *pos) >= 1) {
|
|
*pos = pos_prev;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
BLI_assert_unreachable();
|
|
}
|
|
}
|
|
|
|
bool BLI_str_cursor_step_next_utf32(const char32_t *str, const int str_maxlen, int *pos)
|
|
{
|
|
/* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf8. */
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(*pos >= 0);
|
|
|
|
if (*pos >= str_maxlen) {
|
|
return false;
|
|
}
|
|
do {
|
|
(*pos)++;
|
|
} while ((*pos < str_maxlen) && (str[*pos] != 0) && (BLI_wcwidth_or_error(str[*pos]) == 0));
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BLI_str_cursor_step_prev_utf32(const char32_t *str, const int str_maxlen, int *pos)
|
|
{
|
|
/* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf8. */
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(*pos >= 0);
|
|
UNUSED_VARS_NDEBUG(str_maxlen);
|
|
|
|
if (*pos <= 0) {
|
|
return false;
|
|
}
|
|
do {
|
|
(*pos)--;
|
|
} while ((*pos > 0) && (BLI_wcwidth_or_error(str[*pos]) == 0));
|
|
|
|
return true;
|
|
}
|
|
|
|
void BLI_str_cursor_step_utf32(const char32_t *str,
|
|
const int str_maxlen,
|
|
int *pos,
|
|
eStrCursorJumpDirection direction,
|
|
eStrCursorJumpType jump,
|
|
bool use_init_step)
|
|
{
|
|
BLI_assert(str_maxlen >= 0);
|
|
const int pos_orig = *pos;
|
|
|
|
if (direction == STRCUR_DIR_NEXT) {
|
|
if (use_init_step) {
|
|
BLI_str_cursor_step_next_utf32(str, str_maxlen, pos);
|
|
}
|
|
else {
|
|
BLI_assert(jump == STRCUR_JUMP_DELIM);
|
|
}
|
|
|
|
if (jump != STRCUR_JUMP_NONE) {
|
|
const eStrCursorDelimType delim_type = (*pos < str_maxlen) ?
|
|
cursor_delim_type_unicode(uint(str[*pos])) :
|
|
STRCUR_DELIM_NONE;
|
|
/* jump between special characters (/,\,_,-, etc.),
|
|
* look at function cursor_delim_type_unicode() for complete
|
|
* list of special character, ctr -> */
|
|
while (*pos < str_maxlen) {
|
|
if (BLI_str_cursor_step_next_utf32(str, str_maxlen, pos)) {
|
|
if ((jump != STRCUR_JUMP_ALL) &&
|
|
(delim_type != cursor_delim_type_unicode(uint(str[*pos]))))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
break; /* unlikely but just in case */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (direction == STRCUR_DIR_PREV) {
|
|
if (use_init_step) {
|
|
BLI_str_cursor_step_prev_utf32(str, str_maxlen, pos);
|
|
}
|
|
else {
|
|
BLI_assert(jump == STRCUR_JUMP_DELIM);
|
|
}
|
|
|
|
if (jump != STRCUR_JUMP_NONE) {
|
|
const eStrCursorDelimType delim_type = (*pos > 0) ?
|
|
cursor_delim_type_unicode(uint(str[(*pos) - 1])) :
|
|
STRCUR_DELIM_NONE;
|
|
/* jump between special characters (/,\,_,-, etc.),
|
|
* look at function cursor_delim_type() for complete
|
|
* list of special character, ctr -> */
|
|
while (*pos > 0) {
|
|
const int pos_prev = *pos;
|
|
if (BLI_str_cursor_step_prev_utf32(str, str_maxlen, pos)) {
|
|
if ((jump != STRCUR_JUMP_ALL) &&
|
|
(delim_type != cursor_delim_type_unicode(uint(str[*pos]))))
|
|
{
|
|
/* left only: compensate for index/change in direction */
|
|
if ((pos_orig - *pos) >= 1) {
|
|
*pos = pos_prev;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
BLI_assert_unreachable();
|
|
}
|
|
}
|
|
|
|
void BLI_str_cursor_step_bounds_utf8(
|
|
const char *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
|
|
{
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(pos >= 0 && pos <= str_maxlen);
|
|
/* Identify the type of characters are on either side of the current cursor position. */
|
|
const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_utf8(str, str_maxlen, pos - 1) :
|
|
STRCUR_DELIM_NONE;
|
|
const eStrCursorDelimType next = (pos < str_maxlen) ?
|
|
cursor_delim_type_utf8(str, str_maxlen, pos) :
|
|
STRCUR_DELIM_NONE;
|
|
*r_start = pos;
|
|
*r_end = pos;
|
|
|
|
if (prev != STRCUR_DELIM_NONE) {
|
|
if ((prev <= next) || (next == STRCUR_DELIM_NONE)) {
|
|
/* Expand backward if we are between similar content. */
|
|
BLI_str_cursor_step_utf8(
|
|
str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
|
|
}
|
|
}
|
|
if (next != STRCUR_DELIM_NONE) {
|
|
if ((next <= prev) || (prev == STRCUR_DELIM_NONE)) {
|
|
/* Expand forward if we are between similar content. */
|
|
BLI_str_cursor_step_utf8(str, str_maxlen, r_end, STRCUR_DIR_NEXT, STRCUR_JUMP_DELIM, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
void BLI_str_cursor_step_bounds_utf32(
|
|
const char32_t *str, const int str_maxlen, const int pos, int *r_start, int *r_end)
|
|
{
|
|
BLI_assert(str_maxlen >= 0);
|
|
BLI_assert(pos >= 0 && pos <= str_maxlen);
|
|
/* Identify the type of characters are on either side of the current cursor position. */
|
|
const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_unicode(str[pos - 1]) :
|
|
STRCUR_DELIM_NONE;
|
|
const eStrCursorDelimType next = (pos < str_maxlen) ? cursor_delim_type_unicode(str[pos]) :
|
|
STRCUR_DELIM_NONE;
|
|
*r_start = pos;
|
|
*r_end = pos;
|
|
|
|
if (prev != STRCUR_DELIM_NONE) {
|
|
if ((prev <= next) || (next == STRCUR_DELIM_NONE)) {
|
|
/* Expand backward if we are between similar content. */
|
|
BLI_str_cursor_step_utf32(
|
|
str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
|
|
}
|
|
}
|
|
if (next != STRCUR_DELIM_NONE) {
|
|
if ((next <= prev) || (prev == STRCUR_DELIM_NONE)) {
|
|
/* Expand forward if we are between similar content. */
|
|
BLI_str_cursor_step_utf32(str, str_maxlen, r_end, STRCUR_DIR_NEXT, STRCUR_JUMP_DELIM, false);
|
|
}
|
|
}
|
|
}
|