Refactor: Don't modify std::locale::global for translation

Without boost::locale, there seems no particular reason to modify the
global locale, we can just translate strings ourselves.

Other locale facets like numeric and time are also left unchanged
from the default, which is the "C" locale. This appears to match
previous Blender behavior with boost::locale. That protects against
bugs where formatting floats for I/O unexpectedly depends on the
system language.

Pull Request: https://projects.blender.org/blender/blender/pulls/133347
This commit is contained in:
Brecht Van Lommel
2025-01-21 08:07:44 +01:00
parent 820f261371
commit 860bfd786e
6 changed files with 246 additions and 285 deletions

View File

@@ -6,8 +6,6 @@
* \ingroup intern_locale
*/
#include <iostream>
#include "blender_locale.h"
#include "messages.h"
@@ -15,71 +13,27 @@ static std::string messages_path;
static std::string default_domain;
static std::string locale_str;
/* NOTE: We cannot use short stuff like `boost::locale::gettext`, because those return
* `std::basic_string` objects, which c_ptr()-returned char* is no more valid
* once deleted (which happens as soons they are out of scope of this func). */
static std::locale locale_global;
static blender::locale::MessageFacet const *facet_global = nullptr;
static void bl_locale_global_cache()
{
/* Cache facet in global variable. Not only is it better for performance,
* it also fixes crashes on macOS when doing translation from threads other
* than main. Likely because of some internal thread local variables. */
try {
/* facet_global reference is valid as long as local_global exists,
* so we store both. */
locale_global = std::locale();
facet_global = &std::use_facet<blender::locale::MessageFacet>(locale_global);
}
// TODO: verify it's not installed for C case
/* `if std::has_facet<blender::locale::MessageFacet>(l) == false`, LC_ALL = "C" case. */
catch (const std::bad_cast &e) {
#ifndef NDEBUG
std::cout << "bl_locale_global_cache:" << e.what() << " \n";
#endif
(void)e;
facet_global = nullptr;
}
catch (const std::exception &e) {
#ifndef NDEBUG
std::cout << "bl_locale_global_cache:" << e.what() << " \n";
#endif
(void)e;
facet_global = nullptr;
}
}
void bl_locale_init(const char *_messages_path, const char *_default_domain)
{
/* TODO: Do we need to modify locale for other things like numeric or time?
* And if so, do we need to set it to "C", or to the chosen language? */
messages_path = _messages_path;
default_domain = _default_domain;
}
void bl_locale_free()
{
blender::locale::free();
}
void bl_locale_set(const char *locale_name)
{
/* Get locale name from system if not specified. */
std::string locale_full_name = locale_name ? locale_name : "";
try {
/* Retrieve and parse full locale name. */
blender::locale::Info info(locale_full_name);
/* Initialize and load .mo file for locale. */
blender::locale::init(locale_full_name, {default_domain}, {messages_path});
/* Load .mo file for locale. */
std::locale _locale = blender::locale::MessageFacet::install(
std::locale(), info, {default_domain}, {messages_path});
std::locale::global(_locale);
bl_locale_global_cache();
/* Generate the locale string, to known which one is used in case of default locale. */
locale_str = info.to_full_name();
}
catch (std::exception const &e) {
std::cout << "bl_locale_set(" << locale_full_name << "): " << e.what() << " \n";
}
/* Generate the locale string, to known which one is used in case of default locale. */
locale_str = blender::locale::full_name();
}
const char *bl_locale_get(void)
@@ -89,12 +43,6 @@ const char *bl_locale_get(void)
const char *bl_locale_pgettext(const char *msgctxt, const char *msgid)
{
if (facet_global) {
char const *r = facet_global->translate(0, msgctxt, msgid);
if (r) {
return r;
}
}
return msgid;
const char *r = blender::locale::translate(0, msgctxt, msgid);
return (r) ? r : msgid;
}

View File

@@ -16,6 +16,7 @@ extern "C" {
void bl_locale_init(const char *messages_path, const char *default_domain);
void bl_locale_set(const char *locale);
void bl_locale_free(void);
const char *bl_locale_get(void);
const char *bl_locale_pgettext(const char *msgctxt, const char *msgid);

View File

@@ -10,7 +10,7 @@
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <stdexcept>
#include <memory>
#include <string>
#include <string_view>
@@ -63,194 +63,202 @@ static constexpr bool is_numeric_ascii(const char c)
return '0' <= c && c <= '9';
}
/* Locale parsing. */
static bool parse_from_variant(Info &info, const std::string_view input)
{
if (info.language == "C" || input.empty()) {
return false;
}
info.variant = input;
/* No assumptions, just make it lowercase. */
for (char &c : info.variant) {
make_lower_ascii(c);
}
return true;
}
static bool parse_from_encoding(Info &info, const std::string_view input)
{
const auto end = input.find_first_of('@');
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
/* tmp contains encoding, we ignore it. */
if (end >= input.size()) {
return true;
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_country(Info &info, const std::string_view input)
{
if (info.language == "C") {
return false;
}
const auto end = input.find_first_of("@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
make_upper_ascii(c);
}
/* If it's ALL uppercase ASCII, assume ISO 3166 country id. */
if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) {
/* else handle special cases:
* - en_US_POSIX is an alias for C
* - M49 country code: 3 digits */
if (info.language == "en" && tmp == "US_POSIX") {
info.language = "C";
tmp.clear();
}
else if (tmp.size() != 3u ||
std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end())
{
return false;
}
}
info.country = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_script(Info &info, const std::string_view input)
{
const auto end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
/* Script is exactly 4 ASCII characters, otherwise it is not present. */
if (tmp.length() != 4) {
return parse_from_country(info, input);
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return parse_from_country(info, input);
}
}
make_upper_ascii(tmp[0]); /* Capitalize first letter only. */
info.script = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_country(info, input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
static bool parse_from_lang(Info &info, const std::string_view input)
{
const auto end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return false;
}
}
if (tmp != "c" && tmp != "posix") { /* Keep default if C or POSIX. */
info.language = tmp;
}
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_script(info, input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(info, input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(info, input.substr(end + 1));
}
/* Info about a locale. */
Info::Info(const StringRef locale_full_name)
{
std::string locale_name(locale_full_name);
class Info {
public:
std::string language = "C";
std::string script;
std::string country;
std::string variant;
/* If locale name not specified, try to get the appropriate one from the system. */
Info(const StringRef locale_full_name)
{
std::string locale_name(locale_full_name);
/* If locale name not specified, try to get the appropriate one from the system. */
#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
if (locale_name.empty()) {
locale_name = macos_user_locale();
}
if (locale_name.empty()) {
locale_name = macos_user_locale();
}
#endif
if (locale_name.empty()) {
const char *lc_all = BLI_getenv("LC_ALL");
if (lc_all) {
locale_name = lc_all;
}
}
if (locale_name.empty()) {
const char *lang = BLI_getenv("LANG");
if (lang) {
locale_name = lang;
}
}
#ifdef _WIN32
if (locale_name.empty()) {
char buf[128] = {};
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) {
locale_name = buf;
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) {
locale_name += "_";
locale_name += buf;
if (locale_name.empty()) {
const char *lc_all = BLI_getenv("LC_ALL");
if (lc_all) {
locale_name = lc_all;
}
}
if (locale_name.empty()) {
const char *lang = BLI_getenv("LANG");
if (lang) {
locale_name = lang;
}
}
#ifdef _WIN32
if (locale_name.empty()) {
char buf[128] = {};
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) {
locale_name = buf;
if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) {
locale_name += "_";
locale_name += buf;
}
}
}
}
#endif
parse_from_lang(*this, locale_name);
}
parse_from_lang(locale_name);
}
std::string Info::to_full_name() const
{
std::string result = language;
if (!script.empty()) {
result += '_' + script;
std::string to_full_name() const
{
std::string result = language;
if (!script.empty()) {
result += '_' + script;
}
if (!country.empty()) {
result += '_' + country;
}
if (!variant.empty()) {
result += '@' + variant;
}
return result;
}
if (!country.empty()) {
result += '_' + country;
private:
/* Locale parsing. */
bool parse_from_variant(const std::string_view input)
{
if (language == "C" || input.empty()) {
return false;
}
variant = input;
/* No assumptions, just make it lowercase. */
for (char &c : variant) {
make_lower_ascii(c);
}
return true;
}
if (!variant.empty()) {
result += '@' + variant;
bool parse_from_encoding(const std::string_view input)
{
const int64_t end = input.find_first_of('@');
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
/* tmp contains encoding, we ignore it. */
if (end >= input.size()) {
return true;
}
BLI_assert(input[end] == '@');
return parse_from_variant(input.substr(end + 1));
}
return result;
}
bool parse_from_country(const std::string_view input)
{
if (language == "C") {
return false;
}
const int64_t end = input.find_first_of("@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
make_upper_ascii(c);
}
/* If it's ALL uppercase ASCII, assume ISO 3166 country id. */
if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) {
/* else handle special cases:
* - en_US_POSIX is an alias for C
* - M49 country code: 3 digits */
if (language == "en" && tmp == "US_POSIX") {
language = "C";
tmp.clear();
}
else if (tmp.size() != 3u ||
std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end())
{
return false;
}
}
country = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '.') {
return parse_from_encoding(input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(input.substr(end + 1));
}
bool parse_from_script(const std::string_view input)
{
const int64_t end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
/* Script is exactly 4 ASCII characters, otherwise it is not present. */
if (tmp.length() != 4) {
return parse_from_country(input);
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return parse_from_country(input);
}
}
make_upper_ascii(tmp[0]); /* Capitalize first letter only. */
script = tmp;
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_country(input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(input.substr(end + 1));
}
bool parse_from_lang(const std::string_view input)
{
const int64_t end = input.find_first_of("-_@.");
std::string tmp(input.substr(0, end));
if (tmp.empty()) {
return false;
}
for (char &c : tmp) {
if (!is_lower_ascii(c) && !make_lower_ascii(c)) {
return false;
}
}
if (tmp != "c" && tmp != "posix") { /* Keep default if C or POSIX. */
language = tmp;
}
if (end >= input.size()) {
return true;
}
if (input[end] == '-' || input[end] == '_') {
return parse_from_script(input.substr(end + 1));
}
if (input[end] == '.') {
return parse_from_encoding(input.substr(end + 1));
}
BLI_assert(input[end] == '@');
return parse_from_variant(input.substr(end + 1));
}
};
/* .mo file reader. */
@@ -418,17 +426,17 @@ inline bool operator==(const MessageKeyRef &a, const MessageKey &b)
return a.context_ == b.context_ && a.str_ == b.str_;
}
/* std::locale facet for translation based on .mo files. */
/* Messages translation based on .mo files. */
class MOMessageFacet : public MessageFacet {
class MOMessages {
using Catalog = Map<MessageKey, std::string>;
Vector<Catalog> catalogs_;
std::string error_;
public:
MOMessageFacet(const Info &info,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
MOMessages(const Info &info,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
{
const Vector<std::string> catalog_paths = get_catalog_paths(info, paths);
for (size_t i = 0; i < domains.size(); i++) {
@@ -444,9 +452,7 @@ class MOMessageFacet : public MessageFacet {
}
}
const char *translate(const int domain,
const StringRef context,
const StringRef str) const override
const char *translate(const int domain, const StringRef context, const StringRef str) const
{
if (domain < 0 || domain >= catalogs_.size()) {
return nullptr;
@@ -533,22 +539,47 @@ class MOMessageFacet : public MessageFacet {
}
};
/* Install facet into std::locale. */
/* Public API */
std::locale::id MessageFacet::id;
static std::unique_ptr<MOMessages> global_messages;
static std::string global_full_name;
std::locale MessageFacet::install(const std::locale &locale,
const Info &info,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
void init(const StringRef locale_full_name,
const Vector<std::string> &domains,
const Vector<std::string> &paths)
{
MOMessageFacet *facet = new MOMessageFacet(info, domains, paths);
if (!facet->error().empty()) {
throw std::runtime_error(facet->error());
return locale;
Info info(locale_full_name);
if (global_full_name == info.to_full_name()) {
return;
}
return std::locale(locale, facet);
global_messages = std::make_unique<MOMessages>(info, domains, paths);
global_full_name = info.to_full_name();
if (!global_messages->error().empty()) {
printf("bl_locale_set(%s): %s\n", global_full_name.c_str(), global_messages->error().c_str());
free();
}
}
void free()
{
global_messages.reset();
global_full_name = "";
}
const char *translate(const int domain, const StringRef context, const StringRef key)
{
if (!global_messages) {
return nullptr;
}
return global_messages->translate(domain, context, key);
}
const char *full_name()
{
return global_full_name.c_str();
}
} // namespace blender::locale

View File

@@ -3,7 +3,6 @@
*
* Adapted from boost::locale */
#include <locale>
#include <string>
#include "BLI_string_ref.hh"
@@ -11,34 +10,16 @@
namespace blender::locale {
/* Info about a locale. */
struct Info {
Info(const StringRef locale_full_name);
void init(const StringRef locale_full_name, /* Local name. */
const Vector<std::string> &domains, /* Application names. */
const Vector<std::string> &paths); /* Search paths for .mo files. */
void free();
std::string language = "C";
std::string script;
std::string country;
std::string variant;
std::string to_full_name() const;
const char *translate(const int domain, const StringRef context, const StringRef key);
const char *full_name();
#if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
static std::string macos_user_locale();
std::string macos_user_locale();
#endif
};
/* Message facet to install into std::locale for translation. */
class MessageFacet : public std::locale::facet {
public:
static std::locale::id id;
static std::locale install(const std::locale &locale,
const Info &info,
const Vector<std::string> &domains, /* Application names. */
const Vector<std::string> &paths); /* Search paths for .mo files. */
virtual const char *translate(const int domain,
const StringRef context,
const StringRef key) const = 0;
};
} // namespace blender::locale

View File

@@ -17,7 +17,7 @@ namespace blender::locale {
#if !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL)
/* Get current locale. */
std::string Info::macos_user_locale()
std::string macos_user_locale()
{
std::string result;

View File

@@ -227,8 +227,8 @@ void BLT_lang_init()
void BLT_lang_free()
{
#ifdef WITH_INTERNATIONAL
bl_locale_free();
free_locales();
#else
#endif
}