Listing the "Blender Foundation" as copyright holder implied the Blender Foundation holds copyright to files which may include work from many developers. While keeping copyright on headers makes sense for isolated libraries, Blender's own code may be refactored or moved between files in a way that makes the per file copyright holders less meaningful. Copyright references to the "Blender Foundation" have been replaced with "Blender Authors", with the exception of `./extern/` since these this contains libraries which are more isolated, any changed to license headers there can be handled on a case-by-case basis. Some directories in `./intern/` have also been excluded: - `./intern/cycles/` it's own `AUTHORS` file is planned. - `./intern/opensubdiv/`. An "AUTHORS" file has been added, using the chromium projects authors file as a template. Design task: #110784 Ref !110783.
303 lines
5.8 KiB
C
303 lines
5.8 KiB
C
/* SPDX-FileCopyrightText: 2012 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup intern_utf_conv
|
|
*/
|
|
|
|
#include "utfconv.h"
|
|
|
|
size_t count_utf_8_from_16(const wchar_t *string16)
|
|
{
|
|
int i;
|
|
size_t count = 0;
|
|
wchar_t u = 0;
|
|
if (!string16) {
|
|
return 0;
|
|
}
|
|
|
|
for (i = 0; (u = string16[i]); i++) {
|
|
if (u < 0x0080) {
|
|
count += 1;
|
|
}
|
|
else {
|
|
if (u < 0x0800) {
|
|
count += 2;
|
|
}
|
|
else {
|
|
if (u < 0xD800) {
|
|
count += 3;
|
|
}
|
|
else {
|
|
if (u < 0xDC00) {
|
|
i++;
|
|
if ((u = string16[i]) == 0) {
|
|
break;
|
|
}
|
|
if (u >= 0xDC00 && u < 0xE000) {
|
|
count += 4;
|
|
}
|
|
}
|
|
else {
|
|
if (u < 0xE000) {
|
|
/* Illegal. */
|
|
}
|
|
else {
|
|
count += 3;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ++count;
|
|
}
|
|
|
|
size_t count_utf_16_from_8(const char *string8)
|
|
{
|
|
size_t count = 0;
|
|
char u;
|
|
char type = 0;
|
|
unsigned int u32 = 0;
|
|
|
|
if (!string8) {
|
|
return 0;
|
|
}
|
|
|
|
for (; (u = *string8); string8++) {
|
|
if (type == 0) {
|
|
if ((u & 0x01 << 7) == 0) {
|
|
count++;
|
|
u32 = 0;
|
|
continue;
|
|
} // 1 utf-8 char
|
|
if ((u & 0x07 << 5) == 0xC0) {
|
|
type = 1;
|
|
u32 = u & 0x1F;
|
|
continue;
|
|
} // 2 utf-8 char
|
|
if ((u & 0x0F << 4) == 0xE0) {
|
|
type = 2;
|
|
u32 = u & 0x0F;
|
|
continue;
|
|
} // 3 utf-8 char
|
|
if ((u & 0x1F << 3) == 0xF0) {
|
|
type = 3;
|
|
u32 = u & 0x07;
|
|
continue;
|
|
} // 4 utf-8 char
|
|
continue;
|
|
}
|
|
if ((u & 0xC0) == 0x80) {
|
|
u32 = (u32 << 6) | (u & 0x3F);
|
|
type--;
|
|
}
|
|
else {
|
|
u32 = 0;
|
|
type = 0;
|
|
}
|
|
|
|
if (type == 0) {
|
|
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
|
|
count++;
|
|
}
|
|
else if (0x10000 <= u32 && u32 < 0x110000) {
|
|
count += 2;
|
|
}
|
|
u32 = 0;
|
|
}
|
|
}
|
|
|
|
return ++count;
|
|
}
|
|
|
|
int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
|
|
{
|
|
char *out8end = out8 + size8;
|
|
wchar_t u = 0;
|
|
int err = 0;
|
|
if (!size8 || !in16 || !out8) {
|
|
return UTF_ERROR_NULL_IN;
|
|
}
|
|
out8end--;
|
|
|
|
for (; out8 < out8end && (u = *in16); in16++, out8++) {
|
|
if (u < 0x0080) {
|
|
*out8 = u;
|
|
}
|
|
else if (u < 0x0800) {
|
|
if (out8 + 1 >= out8end) {
|
|
break;
|
|
}
|
|
*out8++ = (0x3 << 6) | (0x1F & (u >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (u));
|
|
}
|
|
else if (u < 0xD800 || u >= 0xE000) {
|
|
if (out8 + 2 >= out8end) {
|
|
break;
|
|
}
|
|
*out8++ = (0x7 << 5) | (0xF & (u >> 12));
|
|
*out8++ = (0x1 << 7) | (0x3F & (u >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (u));
|
|
}
|
|
else if (u < 0xDC00) {
|
|
wchar_t u2 = *++in16;
|
|
|
|
if (!u2) {
|
|
break;
|
|
}
|
|
if (u2 >= 0xDC00 && u2 < 0xE000) {
|
|
if (out8 + 3 >= out8end) {
|
|
break;
|
|
}
|
|
unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
|
|
|
|
*out8++ = (0xF << 4) | (0x7 & (uc >> 18));
|
|
*out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
|
|
*out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
|
|
*out8 = (0x1 << 7) | (0x3F & (uc));
|
|
}
|
|
else {
|
|
out8--;
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
}
|
|
}
|
|
else if (u < 0xE000) {
|
|
out8--;
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
}
|
|
}
|
|
|
|
*out8 = *out8end = 0;
|
|
|
|
if (*in16) {
|
|
err |= UTF_ERROR_SMALL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
|
|
{
|
|
char u;
|
|
char type = 0;
|
|
unsigned int u32 = 0;
|
|
wchar_t *out16end = out16 + size16;
|
|
int err = 0;
|
|
if (!size16 || !in8 || !out16) {
|
|
return UTF_ERROR_NULL_IN;
|
|
}
|
|
out16end--;
|
|
|
|
for (; out16 < out16end && (u = *in8); in8++) {
|
|
if (type == 0) {
|
|
if ((u & 0x01 << 7) == 0) {
|
|
*out16 = u;
|
|
out16++;
|
|
u32 = 0;
|
|
continue;
|
|
} // 1 utf-8 char
|
|
if ((u & 0x07 << 5) == 0xC0) {
|
|
type = 1;
|
|
u32 = u & 0x1F;
|
|
continue;
|
|
} // 2 utf-8 char
|
|
if ((u & 0x0F << 4) == 0xE0) {
|
|
type = 2;
|
|
u32 = u & 0x0F;
|
|
continue;
|
|
} // 3 utf-8 char
|
|
if ((u & 0x1F << 3) == 0xF0) {
|
|
type = 3;
|
|
u32 = u & 0x07;
|
|
continue;
|
|
} // 4 utf-8 char
|
|
err |= UTF_ERROR_ILLCHAR;
|
|
continue;
|
|
}
|
|
if ((u & 0xC0) == 0x80) {
|
|
u32 = (u32 << 6) | (u & 0x3F);
|
|
type--;
|
|
}
|
|
else {
|
|
u32 = 0;
|
|
type = 0;
|
|
err |= UTF_ERROR_ILLSEQ;
|
|
}
|
|
|
|
if (type == 0) {
|
|
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
|
|
*out16 = u32;
|
|
out16++;
|
|
}
|
|
else if (0x10000 <= u32 && u32 < 0x110000) {
|
|
if (out16 + 1 >= out16end) {
|
|
break;
|
|
}
|
|
u32 -= 0x10000;
|
|
*out16 = 0xD800 + (u32 >> 10);
|
|
out16++;
|
|
*out16 = 0xDC00 + (u32 & 0x3FF);
|
|
out16++;
|
|
}
|
|
u32 = 0;
|
|
}
|
|
}
|
|
|
|
*out16 = *out16end = 0;
|
|
|
|
if (*in8) {
|
|
err |= UTF_ERROR_SMALL;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/* UNUSED FUNCTIONS */
|
|
#if 0
|
|
static int is_ascii(const char *in8)
|
|
{
|
|
for (; *in8; in8++)
|
|
if (0x80 & *in8)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
|
|
{
|
|
char *cur = inout8 + maxcutpoint;
|
|
char cc;
|
|
if (!inout8)
|
|
return;
|
|
|
|
cc = *cur;
|
|
}
|
|
#endif
|
|
|
|
char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
|
|
{
|
|
size_t bsize = count_utf_8_from_16(in16);
|
|
char *out8 = NULL;
|
|
if (!bsize) {
|
|
return NULL;
|
|
}
|
|
out8 = (char *)malloc(sizeof(char) * (bsize + add));
|
|
conv_utf_16_to_8(in16, out8, bsize);
|
|
return out8;
|
|
}
|
|
|
|
wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
|
|
{
|
|
size_t bsize = count_utf_16_from_8(in8);
|
|
wchar_t *out16 = NULL;
|
|
if (!bsize) {
|
|
return NULL;
|
|
}
|
|
out16 = (wchar_t *)malloc(sizeof(wchar_t) * (bsize + add));
|
|
conv_utf_8_to_16(in8, out16, bsize);
|
|
return out16;
|
|
}
|