Files
test/source/blender/blenlib/intern/string_cursor_utf8.c
Campbell Barton e955c94ed3 License Headers: Set copyright to "Blender Authors", add AUTHORS
Listing the "Blender Foundation" as copyright holder implied the Blender
Foundation holds copyright to files which may include work from many
developers.

While keeping copyright on headers makes sense for isolated libraries,
Blender's own code may be refactored or moved between files in a way
that makes the per file copyright holders less meaningful.

Copyright references to the "Blender Foundation" have been replaced with
"Blender Authors", with the exception of `./extern/` since these this
contains libraries which are more isolated, any changed to license
headers there can be handled on a case-by-case basis.

Some directories in `./intern/` have also been excluded:

- `./intern/cycles/` it's own `AUTHORS` file is planned.
- `./intern/opensubdiv/`.

An "AUTHORS" file has been added, using the chromium projects authors
file as a template.

Design task: #110784

Ref !110783.
2023-08-16 00:20:26 +10:00

373 lines
11 KiB
C

/* SPDX-FileCopyrightText: 2011 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup bli
*/
#include <stdio.h>
#include <stdlib.h>
#include "BLI_string_utf8.h"
#include "BLI_utildefines.h"
#include "BLI_string_cursor_utf8.h" /* own include */
#ifdef __GNUC__
# pragma GCC diagnostic error "-Wsign-conversion"
#endif
/**
* The category of character as returned by #cursor_delim_type_unicode.
*
* \note Don't compare with any values besides #STRCUR_DELIM_NONE as cursor motion
* should only delimit on changes, not treat some groups differently.
*
* For range calculation the order prioritizes expansion direction,
* when the cursor is between two different categories, "hug" the smaller values.
* Where white-space gets lowest priority. See #BLI_str_cursor_step_bounds_utf8.
* This is done so expanding the range at a word boundary always chooses the word instead
* of the white-space before or after it.
*/
typedef enum eStrCursorDelimType {
STRCUR_DELIM_NONE,
STRCUR_DELIM_ALPHANUMERIC,
STRCUR_DELIM_PUNCT,
STRCUR_DELIM_BRACE,
STRCUR_DELIM_OPERATOR,
STRCUR_DELIM_QUOTE,
STRCUR_DELIM_OTHER,
STRCUR_DELIM_WHITESPACE,
} eStrCursorDelimType;
static eStrCursorDelimType cursor_delim_type_unicode(const uint uch)
{
switch (uch) {
case ',':
case '.':
return STRCUR_DELIM_PUNCT;
case '{':
case '}':
case '[':
case ']':
case '(':
case ')':
return STRCUR_DELIM_BRACE;
case '+':
case '-':
case '=':
case '~':
case '%':
case '/':
case '<':
case '>':
case '^':
case '*':
case '&':
case '|':
return STRCUR_DELIM_OPERATOR;
case '\'':
case '\"':
return STRCUR_DELIM_QUOTE;
case ' ':
case '\t':
case '\n':
return STRCUR_DELIM_WHITESPACE;
case '\\':
case '@':
case '#':
case '$':
case ':':
case ';':
case '?':
case '!':
case 0xA3: /* pound */
case 0x80: /* euro */
/* case '_': */ /* special case, for python */
return STRCUR_DELIM_OTHER;
default:
break;
}
return STRCUR_DELIM_ALPHANUMERIC; /* Not quite true, but ok for now */
}
static eStrCursorDelimType cursor_delim_type_utf8(const char *ch_utf8,
const size_t ch_utf8_len,
const int pos)
{
/* for full unicode support we really need to have large lookup tables to figure
* out what's what in every possible char set - and python, glib both have these. */
size_t index = (size_t)pos;
uint uch = BLI_str_utf8_as_unicode_step_or_error(ch_utf8, ch_utf8_len, &index);
return cursor_delim_type_unicode(uch);
}
bool BLI_str_cursor_step_next_utf8(const char *str, size_t str_maxlen, int *pos)
{
/* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf32. */
if ((*pos) >= (int)str_maxlen) {
return false;
}
const char *str_end = str + (str_maxlen + 1);
const char *str_pos = str + (*pos);
const char *str_next = str_pos;
do {
str_next = BLI_str_find_next_char_utf8(str_next, str_end);
} while (str_next < str_end && str_next[0] != 0 && BLI_str_utf8_char_width(str_next) < 1);
(*pos) += (str_next - str_pos);
if ((*pos) > (int)str_maxlen) {
(*pos) = (int)str_maxlen;
}
return true;
}
bool BLI_str_cursor_step_prev_utf8(const char *str, size_t str_maxlen, int *pos)
{
/* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf32. */
if ((*pos) > 0 && (*pos) <= str_maxlen) {
const char *str_pos = str + (*pos);
const char *str_prev = str_pos;
do {
str_prev = BLI_str_find_prev_char_utf8(str_prev, str);
} while (str_prev > str && BLI_str_utf8_char_width(str_prev) == 0);
(*pos) -= (str_pos - str_prev);
return true;
}
return false;
}
void BLI_str_cursor_step_utf8(const char *str,
size_t str_maxlen,
int *pos,
eStrCursorJumpDirection direction,
eStrCursorJumpType jump,
bool use_init_step)
{
const int pos_orig = *pos;
if (direction == STRCUR_DIR_NEXT) {
if (use_init_step) {
BLI_str_cursor_step_next_utf8(str, str_maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
}
if (jump != STRCUR_JUMP_NONE) {
const eStrCursorDelimType delim_type = (*pos) < str_maxlen ?
cursor_delim_type_utf8(str, str_maxlen, *pos) :
STRCUR_DELIM_NONE;
/* jump between special characters (/,\,_,-, etc.),
* look at function cursor_delim_type() for complete
* list of special character, ctr -> */
while ((*pos) < str_maxlen) {
if (BLI_str_cursor_step_next_utf8(str, str_maxlen, pos)) {
if (*pos == str_maxlen) {
break;
}
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos))) {
break;
}
}
else {
break; /* unlikely but just in case */
}
}
}
}
else if (direction == STRCUR_DIR_PREV) {
if (use_init_step) {
BLI_str_cursor_step_prev_utf8(str, str_maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
}
if (jump != STRCUR_JUMP_NONE) {
const eStrCursorDelimType delim_type = (*pos) > 0 ? cursor_delim_type_utf8(
str, str_maxlen, *pos - 1) :
STRCUR_DELIM_NONE;
/* jump between special characters (/,\,_,-, etc.),
* look at function cursor_delim_type() for complete
* list of special character, ctr -> */
while ((*pos) > 0) {
const int pos_prev = *pos;
if (BLI_str_cursor_step_prev_utf8(str, str_maxlen, pos)) {
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_utf8(str, str_maxlen, *pos))) {
/* left only: compensate for index/change in direction */
if ((pos_orig - (*pos)) >= 1) {
*pos = pos_prev;
}
break;
}
}
else {
break;
}
}
}
}
else {
BLI_assert_unreachable();
}
}
bool BLI_str_cursor_step_next_utf32(const char32_t *str, size_t str_maxlen, int *pos)
{
/* NOTE: Keep in sync with #BLI_str_cursor_step_next_utf8. */
if ((*pos) >= (int)str_maxlen) {
return false;
}
do {
(*pos)++;
} while (*pos < (int)str_maxlen && str[*pos] != 0 && BLI_wcwidth(str[*pos]) == 0);
return true;
}
bool BLI_str_cursor_step_prev_utf32(const char32_t *str, size_t UNUSED(str_maxlen), int *pos)
{
/* NOTE: Keep in sync with #BLI_str_cursor_step_prev_utf8. */
if ((*pos) <= 0) {
return false;
}
do {
(*pos)--;
} while (*pos > 0 && BLI_wcwidth(str[*pos]) == 0);
return true;
}
void BLI_str_cursor_step_utf32(const char32_t *str,
size_t str_maxlen,
int *pos,
eStrCursorJumpDirection direction,
eStrCursorJumpType jump,
bool use_init_step)
{
const int pos_orig = *pos;
if (direction == STRCUR_DIR_NEXT) {
if (use_init_step) {
BLI_str_cursor_step_next_utf32(str, str_maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
}
if (jump != STRCUR_JUMP_NONE) {
const eStrCursorDelimType delim_type = (*pos) < str_maxlen ?
cursor_delim_type_unicode((uint)str[*pos]) :
STRCUR_DELIM_NONE;
/* jump between special characters (/,\,_,-, etc.),
* look at function cursor_delim_type_unicode() for complete
* list of special character, ctr -> */
while ((*pos) < str_maxlen) {
if (BLI_str_cursor_step_next_utf32(str, str_maxlen, pos)) {
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
break;
}
}
else {
break; /* unlikely but just in case */
}
}
}
}
else if (direction == STRCUR_DIR_PREV) {
if (use_init_step) {
BLI_str_cursor_step_prev_utf32(str, str_maxlen, pos);
}
else {
BLI_assert(jump == STRCUR_JUMP_DELIM);
}
if (jump != STRCUR_JUMP_NONE) {
const eStrCursorDelimType delim_type = (*pos) > 0 ?
cursor_delim_type_unicode((uint)str[(*pos) - 1]) :
STRCUR_DELIM_NONE;
/* jump between special characters (/,\,_,-, etc.),
* look at function cursor_delim_type() for complete
* list of special character, ctr -> */
while ((*pos) > 0) {
const int pos_prev = *pos;
if (BLI_str_cursor_step_prev_utf32(str, str_maxlen, pos)) {
if ((jump != STRCUR_JUMP_ALL) &&
(delim_type != cursor_delim_type_unicode((uint)str[*pos]))) {
/* left only: compensate for index/change in direction */
if ((pos_orig - (*pos)) >= 1) {
*pos = pos_prev;
}
break;
}
}
else {
break;
}
}
}
}
else {
BLI_assert_unreachable();
}
}
void BLI_str_cursor_step_bounds_utf8(
const char *str, const size_t str_maxlen, const int pos, int *r_start, int *r_end)
{
/* Identify the type of characters are on either side of the current cursor position. */
const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_utf8(str, str_maxlen, pos - 1) :
STRCUR_DELIM_NONE;
const eStrCursorDelimType next = (pos < str_maxlen) ?
cursor_delim_type_utf8(str, str_maxlen, pos) :
STRCUR_DELIM_NONE;
*r_start = pos;
*r_end = pos;
if ((prev <= next) && (prev != STRCUR_DELIM_NONE)) {
/* Expand backward if we are between similar content. */
BLI_str_cursor_step_utf8(str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
}
if ((prev >= next) && (next != STRCUR_DELIM_NONE)) {
/* Expand forward if we are between similar content. */
BLI_str_cursor_step_utf8(str, str_maxlen, r_end, STRCUR_DIR_NEXT, STRCUR_JUMP_DELIM, false);
}
}
void BLI_str_cursor_step_bounds_utf32(
const char32_t *str, const size_t str_maxlen, const int pos, int *r_start, int *r_end)
{
/* Identify the type of characters are on either side of the current cursor position. */
const eStrCursorDelimType prev = (pos > 0) ? cursor_delim_type_unicode(str[pos - 1]) :
STRCUR_DELIM_NONE;
const eStrCursorDelimType next = (pos < str_maxlen) ? cursor_delim_type_unicode(str[pos]) :
STRCUR_DELIM_NONE;
*r_start = pos;
*r_end = pos;
if ((prev <= next) && (prev != STRCUR_DELIM_NONE)) {
/* Expand backward if we are between similar content. */
BLI_str_cursor_step_utf32(str, str_maxlen, r_start, STRCUR_DIR_PREV, STRCUR_JUMP_DELIM, false);
}
if ((prev >= next) && (next != STRCUR_DELIM_NONE)) {
/* Expand forward if we are between similar content. */
BLI_str_cursor_step_utf32(str, str_maxlen, r_end, STRCUR_DIR_NEXT, STRCUR_JUMP_DELIM, false);
}
}