Files
test2/intern/locale/messages.cpp
Brecht Van Lommel 820f261371 Refactor: Replace Boost Locale for translation
Use code adapted from boost::locale to implement locale name parsing
and .mo file reading ourselves.

Together with #132142 this removes the last direct Blender dependency
on Boost.

Pull Request: https://projects.blender.org/blender/blender/pulls/133347
2025-01-22 09:00:32 +01:00

555 lines
13 KiB
C++

/* SPDX-FileCopyrightText: 2009-2015 Artyom Beilis (Tonkikh)
* SPDX-FileCopyrightText: 2021-2023 Alexander Grund
* SPDX-FileCopyrightText: 2025 Blender Authors
* SPDX-License-Identifier: BSL-1.0
*
* Adapted from boost::locale */
#include "messages.h"
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <stdexcept>
#include <string>
#include <string_view>
#include "BLI_assert.h"
#include "BLI_fileops.h"
#include "BLI_hash.hh"
#include "BLI_map.hh"
#include "BLI_path_utils.hh"
#include "BLI_string_ref.hh"
#include "BLI_vector.hh"
#ifdef _WIN32
# include "BLI_winstuff.h"
#endif
namespace blender::locale {
/* Upper/lower case, intentionally restricted to ASCII. */
static constexpr bool is_upper_ascii(const char c)
{
return 'A' <= c && c <= 'Z';
}
static constexpr bool is_lower_ascii(const char c)
{
return 'a' <= c && c <= 'z';
}
static bool make_lower_ascii(char &c)
{
if (is_upper_ascii(c)) {
c += 'a' - 'A';
return true;
}
return false;
}
static bool make_upper_ascii(char &c)
{
if (is_lower_ascii(c)) {
c += 'A' - 'a';
return true;
}
return false;
}
static constexpr bool is_numeric_ascii(const char c)
{
return '0' <= c && c <= '9';
}
/* Locale parsing. */
static bool parse_from_variant(Info &info, const std::string_view input)
{
if (info.language == "C" || input.empty()) {
return false;
}
info.variant = input;
/* No assumptions, just make it lowercase. */
for (char &c : info.variant) {
make_lower_ascii(c);
}
return true;
}
static bool parse_from_encoding(Info &info, const std::string_view input)
{
const auto end = input.find_first_of('@');
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
/* tmp contains encoding, we ignore it. */
if (end >= input.size()) {
return true;
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_country(Info &info, const std::string_view input)
{
if (info.language == "C") {
return false;
}
const auto end = input.find_first_of("@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
make_upper_ascii(c);
}
/* If it's ALL uppercase ASCII, assume ISO 3166 country id. */
if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) {
/* else handle special cases:
* - en_US_POSIX is an alias for C
* - M49 country code: 3 digits */
if (info.language == "en" && tmp == "US_POSIX") {
info.language = "C";
tmp.clear();
}
else if (tmp.size() != 3u ||
std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end())
{
return false;
}
}
info.country = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_script(Info &info, const std::string_view input)
{
const auto end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
/* Script is exactly 4 ASCII characters, otherwise it is not present. */
if (tmp.length() != 4) {
return parse_from_country(info, input);
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return parse_from_country(info, input);
}
}
make_upper_ascii(tmp[0]); /* Capitalize first letter only. */
info.script = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_country(info, input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_lang(Info &info, const std::string_view input)
{
const auto end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return false;
}
}
if (tmp != "c" && tmp != "posix") { /* Keep default if C or POSIX. */
info.language = tmp;
}
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_script(info, input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
/* Info about a locale. */
Info::Info(const StringRef locale_full_name)
{
std::string locale_name(locale_full_name);
/* If locale name not specified, try to get the appropriate one from the system. */
#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
if (locale_name.empty()) {
locale_name = macos_user_locale();
}
#endif
if (locale_name.empty()) {
const char *lc_all = BLI_getenv("LC_ALL");
if (lc_all) {
locale_name = lc_all;
}
}
if (locale_name.empty()) {
const char *lang = BLI_getenv("LANG");
if (lang) {
locale_name = lang;
}
}
#ifdef _WIN32
if (locale_name.empty()) {
char buf[128] = {};
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) {
locale_name = buf;
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) {
locale_name += "_";
locale_name += buf;
}
}
}
#endif
parse_from_lang(*this, locale_name);
}
std::string Info::to_full_name() const
{
std::string result = language;
if (!script.empty()) {
result += '_' + script;
}
if (!country.empty()) {
result += '_' + country;
}
if (!variant.empty()) {
result += '@' + variant;
}
return result;
}
/* .mo file reader. */
class MOFile {
uint32_t keys_offset_ = 0;
uint32_t translations_offset_ = 0;
Vector<char> data_;
bool native_byteorder_ = false;
size_t size_ = false;
std::string error_;
public:
MOFile(const std::string &filepath)
{
FILE *file = BLI_fopen(filepath.c_str(), "rb");
if (!file) {
return;
}
fseek(file, 0, SEEK_END);
const int64_t len = BLI_ftell(file);
if (len >= 0) {
fseek(file, 0, SEEK_SET);
data_.resize(len);
if (fread(data_.data(), 1, len, file) != len) {
data_.clear();
error_ = "Failed to read file";
}
}
else {
error_ = "Wrong file object";
}
fclose(file);
if (error_.empty()) {
read_data();
}
}
const char *key(int id)
{
const uint32_t off = get(keys_offset_ + id * 8 + 4);
return data_.data() + off;
}
StringRef value(int id)
{
const uint32_t len = get(translations_offset_ + id * 8);
const uint32_t off = get(translations_offset_ + id * 8 + 4);
if (len > data_.size() || off > data_.size() - len) {
error_ = "Bad mo-file format";
return "";
}
return StringRef(&data_[off], len);
}
size_t size() const
{
return size_;
}
bool empty() const
{
return size_ == 0;
}
const std::string &error() const
{
return error_;
}
private:
void read_data()
{
if (data_.size() < 4) {
error_ = "Invalid 'mo' file format - the file is too short";
return;
}
uint32_t magic;
memcpy(&magic, data_.data(), sizeof(magic));
if (magic == 0x950412de) {
native_byteorder_ = true;
}
else if (magic == 0xde120495) {
native_byteorder_ = false;
}
else {
error_ = "Invalid file format - invalid magic number";
return;
}
// Read all format sizes
size_ = get(8);
keys_offset_ = get(12);
translations_offset_ = get(16);
}
uint32_t get(int offset)
{
if (offset > data_.size() - 4) {
error_ = "Bad mo-file format";
return 0;
}
uint32_t v;
memcpy(&v, &data_[offset], 4);
if (!native_byteorder_) {
v = ((v & 0xFF) << 24) | ((v & 0xFF00) << 8) | ((v & 0xFF0000) >> 8) |
((v & 0xFF000000) >> 24);
}
return v;
}
};
/* Message lookup key. */
struct MessageKeyRef {
StringRef context_;
StringRef str_;
uint64_t hash() const
{
return get_default_hash(context_, str_);
}
};
struct MessageKey {
std::string context_;
std::string str_;
MessageKey(const StringRef c)
{
const size_t pos = c.find(char(4));
if (pos == StringRef::not_found) {
str_ = c;
}
else {
context_ = c.substr(0, pos);
str_ = c.substr(pos + 1);
}
}
uint64_t hash() const
{
return get_default_hash(context_, str_);
}
static uint64_t hash_as(const MessageKeyRef &key)
{
return key.hash();
}
};
inline bool operator==(const MessageKey &a, const MessageKey &b)
{
return a.context_ == b.context_ && a.str_ == b.str_;
}
inline bool operator==(const MessageKeyRef &a, const MessageKey &b)
{
return a.context_ == b.context_ && a.str_ == b.str_;
}
/* std::locale facet for translation based on .mo files. */
class MOMessageFacet : public MessageFacet {
using Catalog = Map<MessageKey, std::string>;
Vector<Catalog> catalogs_;
std::string error_;
public:
MOMessageFacet(const Info &info,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
{
const Vector<std::string> catalog_paths = get_catalog_paths(info, paths);
for (size_t i = 0; i < domains.size(); i++) {
const std::string &domain_name = domains[i];
const std::string filename = domain_name + ".mo";
Catalog catalog;
for (const std::string &path : catalog_paths) {
if (load_file(path + "/" + filename, catalog)) {
break;
}
}
catalogs_.append(std::move(catalog));
}
}
const char *translate(const int domain,
const StringRef context,
const StringRef str) const override
{
if (domain < 0 || domain >= catalogs_.size()) {
return nullptr;
}
const MessageKeyRef key{context, str};
const std::string *result = catalogs_[domain].lookup_ptr_as(key);
return (result) ? result->c_str() : nullptr;
}
const std::string &error()
{
return error_;
}
private:
Vector<std::string> get_catalog_paths(const Info &info, const Vector<std::string> &paths)
{
/* Find language folders. */
Vector<std::string> lang_folders;
if (!info.language.empty()) {
if (!info.variant.empty() && !info.country.empty()) {
lang_folders.append(info.language + "_" + info.country + "@" + info.variant);
}
if (!info.variant.empty()) {
lang_folders.append(info.language + "@" + info.variant);
}
if (!info.country.empty()) {
lang_folders.append(info.language + "_" + info.country);
}
lang_folders.append(info.language);
}
/* Find catalogs in language folders. */
Vector<std::string> result;
result.reserve(lang_folders.size() * paths.size());
for (const std::string &lang_folder : lang_folders) {
for (const std::string &search_path : paths) {
result.append(search_path + "/" + lang_folder + "/LC_MESSAGES");
}
}
return result;
}
bool load_file(const std::string &filepath, Catalog &catalog)
{
MOFile mo(filepath);
if (!mo.error().empty()) {
error_ = mo.error();
return false;
}
if (mo.empty()) {
return false;
}
/* Only support UTF-8 encoded files, as created by our msgfmt tool. */
const std::string mo_encoding = extract(mo.value(0), "charset=", " \r\n;");
if (mo_encoding.empty()) {
error_ = "Invalid mo-format, encoding is not specified";
return false;
}
if (mo_encoding != "UTF-8") {
error_ = "supported mo-format, encoding must be UTF-8";
return false;
}
/* Create context + key to translated string mapping. */
for (size_t i = 0; i < mo.size(); i++) {
const MessageKey key(mo.key(i));
catalog.add(std::move(key), std::string(mo.value(i)));
}
return true;
}
static std::string extract(StringRef meta, const std::string &key, const StringRef separators)
{
const size_t pos = meta.find(key);
if (pos == StringRef::not_found) {
return "";
}
meta = meta.substr(pos + key.size());
const size_t end_pos = meta.find_first_of(separators);
return std::string(meta.substr(0, end_pos));
}
};
/* Install facet into std::locale. */
std::locale::id MessageFacet::id;
std::locale MessageFacet::install(const std::locale &locale,
const Info &info,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
{
MOMessageFacet *facet = new MOMessageFacet(info, domains, paths);
if (!facet->error().empty()) {
throw std::runtime_error(facet->error());
return locale;
}
return std::locale(locale, facet);
}
} // namespace blender::locale