Files
test2/intern/utfconv/utfconv.c
Campbell Barton e955c94ed3 License Headers: Set copyright to "Blender Authors", add AUTHORS
Listing the "Blender Foundation" as copyright holder implied the Blender
Foundation holds copyright to files which may include work from many
developers.

While keeping copyright on headers makes sense for isolated libraries,
Blender's own code may be refactored or moved between files in a way
that makes the per file copyright holders less meaningful.

Copyright references to the "Blender Foundation" have been replaced with
"Blender Authors", with the exception of `./extern/` since these this
contains libraries which are more isolated, any changed to license
headers there can be handled on a case-by-case basis.

Some directories in `./intern/` have also been excluded:

- `./intern/cycles/` it's own `AUTHORS` file is planned.
- `./intern/opensubdiv/`.

An "AUTHORS" file has been added, using the chromium projects authors
file as a template.

Design task: #110784

Ref !110783.
2023-08-16 00:20:26 +10:00

303 lines
5.8 KiB
C

/* SPDX-FileCopyrightText: 2012 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup intern_utf_conv
*/
#include "utfconv.h"
size_t count_utf_8_from_16(const wchar_t *string16)
{
int i;
size_t count = 0;
wchar_t u = 0;
if (!string16) {
return 0;
}
for (i = 0; (u = string16[i]); i++) {
if (u < 0x0080) {
count += 1;
}
else {
if (u < 0x0800) {
count += 2;
}
else {
if (u < 0xD800) {
count += 3;
}
else {
if (u < 0xDC00) {
i++;
if ((u = string16[i]) == 0) {
break;
}
if (u >= 0xDC00 && u < 0xE000) {
count += 4;
}
}
else {
if (u < 0xE000) {
/* Illegal. */
}
else {
count += 3;
}
}
}
}
}
}
return ++count;
}
size_t count_utf_16_from_8(const char *string8)
{
size_t count = 0;
char u;
char type = 0;
unsigned int u32 = 0;
if (!string8) {
return 0;
}
for (; (u = *string8); string8++) {
if (type == 0) {
if ((u & 0x01 << 7) == 0) {
count++;
u32 = 0;
continue;
} // 1 utf-8 char
if ((u & 0x07 << 5) == 0xC0) {
type = 1;
u32 = u & 0x1F;
continue;
} // 2 utf-8 char
if ((u & 0x0F << 4) == 0xE0) {
type = 2;
u32 = u & 0x0F;
continue;
} // 3 utf-8 char
if ((u & 0x1F << 3) == 0xF0) {
type = 3;
u32 = u & 0x07;
continue;
} // 4 utf-8 char
continue;
}
if ((u & 0xC0) == 0x80) {
u32 = (u32 << 6) | (u & 0x3F);
type--;
}
else {
u32 = 0;
type = 0;
}
if (type == 0) {
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
count++;
}
else if (0x10000 <= u32 && u32 < 0x110000) {
count += 2;
}
u32 = 0;
}
}
return ++count;
}
int conv_utf_16_to_8(const wchar_t *in16, char *out8, size_t size8)
{
char *out8end = out8 + size8;
wchar_t u = 0;
int err = 0;
if (!size8 || !in16 || !out8) {
return UTF_ERROR_NULL_IN;
}
out8end--;
for (; out8 < out8end && (u = *in16); in16++, out8++) {
if (u < 0x0080) {
*out8 = u;
}
else if (u < 0x0800) {
if (out8 + 1 >= out8end) {
break;
}
*out8++ = (0x3 << 6) | (0x1F & (u >> 6));
*out8 = (0x1 << 7) | (0x3F & (u));
}
else if (u < 0xD800 || u >= 0xE000) {
if (out8 + 2 >= out8end) {
break;
}
*out8++ = (0x7 << 5) | (0xF & (u >> 12));
*out8++ = (0x1 << 7) | (0x3F & (u >> 6));
*out8 = (0x1 << 7) | (0x3F & (u));
}
else if (u < 0xDC00) {
wchar_t u2 = *++in16;
if (!u2) {
break;
}
if (u2 >= 0xDC00 && u2 < 0xE000) {
if (out8 + 3 >= out8end) {
break;
}
unsigned int uc = 0x10000 + (u2 - 0xDC00) + ((u - 0xD800) << 10);
*out8++ = (0xF << 4) | (0x7 & (uc >> 18));
*out8++ = (0x1 << 7) | (0x3F & (uc >> 12));
*out8++ = (0x1 << 7) | (0x3F & (uc >> 6));
*out8 = (0x1 << 7) | (0x3F & (uc));
}
else {
out8--;
err |= UTF_ERROR_ILLCHAR;
}
}
else if (u < 0xE000) {
out8--;
err |= UTF_ERROR_ILLCHAR;
}
}
*out8 = *out8end = 0;
if (*in16) {
err |= UTF_ERROR_SMALL;
}
return err;
}
int conv_utf_8_to_16(const char *in8, wchar_t *out16, size_t size16)
{
char u;
char type = 0;
unsigned int u32 = 0;
wchar_t *out16end = out16 + size16;
int err = 0;
if (!size16 || !in8 || !out16) {
return UTF_ERROR_NULL_IN;
}
out16end--;
for (; out16 < out16end && (u = *in8); in8++) {
if (type == 0) {
if ((u & 0x01 << 7) == 0) {
*out16 = u;
out16++;
u32 = 0;
continue;
} // 1 utf-8 char
if ((u & 0x07 << 5) == 0xC0) {
type = 1;
u32 = u & 0x1F;
continue;
} // 2 utf-8 char
if ((u & 0x0F << 4) == 0xE0) {
type = 2;
u32 = u & 0x0F;
continue;
} // 3 utf-8 char
if ((u & 0x1F << 3) == 0xF0) {
type = 3;
u32 = u & 0x07;
continue;
} // 4 utf-8 char
err |= UTF_ERROR_ILLCHAR;
continue;
}
if ((u & 0xC0) == 0x80) {
u32 = (u32 << 6) | (u & 0x3F);
type--;
}
else {
u32 = 0;
type = 0;
err |= UTF_ERROR_ILLSEQ;
}
if (type == 0) {
if ((0 < u32 && u32 < 0xD800) || (0xE000 <= u32 && u32 < 0x10000)) {
*out16 = u32;
out16++;
}
else if (0x10000 <= u32 && u32 < 0x110000) {
if (out16 + 1 >= out16end) {
break;
}
u32 -= 0x10000;
*out16 = 0xD800 + (u32 >> 10);
out16++;
*out16 = 0xDC00 + (u32 & 0x3FF);
out16++;
}
u32 = 0;
}
}
*out16 = *out16end = 0;
if (*in8) {
err |= UTF_ERROR_SMALL;
}
return err;
}
/* UNUSED FUNCTIONS */
#if 0
static int is_ascii(const char *in8)
{
for (; *in8; in8++)
if (0x80 & *in8)
return 0;
return 1;
}
static void utf_8_cut_end(char *inout8, size_t maxcutpoint)
{
char *cur = inout8 + maxcutpoint;
char cc;
if (!inout8)
return;
cc = *cur;
}
#endif
char *alloc_utf_8_from_16(const wchar_t *in16, size_t add)
{
size_t bsize = count_utf_8_from_16(in16);
char *out8 = NULL;
if (!bsize) {
return NULL;
}
out8 = (char *)malloc(sizeof(char) * (bsize + add));
conv_utf_16_to_8(in16, out8, bsize);
return out8;
}
wchar_t *alloc_utf16_from_8(const char *in8, size_t add)
{
size_t bsize = count_utf_16_from_8(in8);
wchar_t *out16 = NULL;
if (!bsize) {
return NULL;
}
out16 = (wchar_t *)malloc(sizeof(wchar_t) * (bsize + add));
conv_utf_8_to_16(in8, out16, bsize);
return out16;
}