From 860bfd786e3a0c974be9a38ba88070e400ea3873 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 21 Jan 2025 08:07:44 +0100 Subject: [PATCH] Refactor: Don't modify std::locale::global for translation Without boost::locale, there seems no particular reason to modify the global locale, we can just translate strings ourselves. Other locale facets like numeric and time are also left unchanged from the default, which is the "C" locale. This appears to match previous Blender behavior with boost::locale. That protects against bugs where formatting floats for I/O unexpectedly depends on the system language. Pull Request: https://projects.blender.org/blender/blender/pulls/133347 --- intern/locale/blender_locale.cpp | 74 +--- intern/locale/blender_locale.h | 1 + intern/locale/messages.cpp | 419 ++++++++++-------- intern/locale/messages.h | 33 +- intern/locale/messages_apple.mm | 2 +- .../blentranslation/intern/blt_lang.cc | 2 +- 6 files changed, 246 insertions(+), 285 deletions(-) diff --git a/intern/locale/blender_locale.cpp b/intern/locale/blender_locale.cpp index b67bb6f7ad5..4dfc69e4793 100644 --- a/intern/locale/blender_locale.cpp +++ b/intern/locale/blender_locale.cpp @@ -6,8 +6,6 @@ * \ingroup intern_locale */ -#include - #include "blender_locale.h" #include "messages.h" @@ -15,71 +13,27 @@ static std::string messages_path; static std::string default_domain; static std::string locale_str; -/* NOTE: We cannot use short stuff like `boost::locale::gettext`, because those return - * `std::basic_string` objects, which c_ptr()-returned char* is no more valid - * once deleted (which happens as soons they are out of scope of this func). */ -static std::locale locale_global; -static blender::locale::MessageFacet const *facet_global = nullptr; - -static void bl_locale_global_cache() -{ - /* Cache facet in global variable. Not only is it better for performance, - * it also fixes crashes on macOS when doing translation from threads other - * than main. Likely because of some internal thread local variables. */ - try { - /* facet_global reference is valid as long as local_global exists, - * so we store both. */ - locale_global = std::locale(); - facet_global = &std::use_facet(locale_global); - } - // TODO: verify it's not installed for C case - /* `if std::has_facet(l) == false`, LC_ALL = "C" case. */ - catch (const std::bad_cast &e) { -#ifndef NDEBUG - std::cout << "bl_locale_global_cache:" << e.what() << " \n"; -#endif - (void)e; - facet_global = nullptr; - } - catch (const std::exception &e) { -#ifndef NDEBUG - std::cout << "bl_locale_global_cache:" << e.what() << " \n"; -#endif - (void)e; - facet_global = nullptr; - } -} - void bl_locale_init(const char *_messages_path, const char *_default_domain) { - /* TODO: Do we need to modify locale for other things like numeric or time? - * And if so, do we need to set it to "C", or to the chosen language? */ messages_path = _messages_path; default_domain = _default_domain; } +void bl_locale_free() +{ + blender::locale::free(); +} + void bl_locale_set(const char *locale_name) { /* Get locale name from system if not specified. */ std::string locale_full_name = locale_name ? locale_name : ""; - try { - /* Retrieve and parse full locale name. */ - blender::locale::Info info(locale_full_name); + /* Initialize and load .mo file for locale. */ + blender::locale::init(locale_full_name, {default_domain}, {messages_path}); - /* Load .mo file for locale. */ - std::locale _locale = blender::locale::MessageFacet::install( - std::locale(), info, {default_domain}, {messages_path}); - std::locale::global(_locale); - - bl_locale_global_cache(); - - /* Generate the locale string, to known which one is used in case of default locale. */ - locale_str = info.to_full_name(); - } - catch (std::exception const &e) { - std::cout << "bl_locale_set(" << locale_full_name << "): " << e.what() << " \n"; - } + /* Generate the locale string, to known which one is used in case of default locale. */ + locale_str = blender::locale::full_name(); } const char *bl_locale_get(void) @@ -89,12 +43,6 @@ const char *bl_locale_get(void) const char *bl_locale_pgettext(const char *msgctxt, const char *msgid) { - if (facet_global) { - char const *r = facet_global->translate(0, msgctxt, msgid); - if (r) { - return r; - } - } - - return msgid; + const char *r = blender::locale::translate(0, msgctxt, msgid); + return (r) ? r : msgid; } diff --git a/intern/locale/blender_locale.h b/intern/locale/blender_locale.h index 162f1eca968..d45b666da2a 100644 --- a/intern/locale/blender_locale.h +++ b/intern/locale/blender_locale.h @@ -16,6 +16,7 @@ extern "C" { void bl_locale_init(const char *messages_path, const char *default_domain); void bl_locale_set(const char *locale); +void bl_locale_free(void); const char *bl_locale_get(void); const char *bl_locale_pgettext(const char *msgctxt, const char *msgid); diff --git a/intern/locale/messages.cpp b/intern/locale/messages.cpp index 202b55a3445..bcbc541faff 100644 --- a/intern/locale/messages.cpp +++ b/intern/locale/messages.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -63,194 +63,202 @@ static constexpr bool is_numeric_ascii(const char c) return '0' <= c && c <= '9'; } -/* Locale parsing. */ - -static bool parse_from_variant(Info &info, const std::string_view input) -{ - if (info.language == "C" || input.empty()) { - return false; - } - info.variant = input; - /* No assumptions, just make it lowercase. */ - for (char &c : info.variant) { - make_lower_ascii(c); - } - return true; -} - -static bool parse_from_encoding(Info &info, const std::string_view input) -{ - const auto end = input.find_first_of('@'); - std::string tmp(input.substr(0, end)); - if (tmp.empty()) { - return false; - } - /* tmp contains encoding, we ignore it. */ - if (end >= input.size()) { - return true; - } - BLI_assert(input[end] == '@'); - return parse_from_variant(info, input.substr(end + 1)); -} - -static bool parse_from_country(Info &info, const std::string_view input) -{ - if (info.language == "C") { - return false; - } - - const auto end = input.find_first_of("@."); - std::string tmp(input.substr(0, end)); - if (tmp.empty()) { - return false; - } - - for (char &c : tmp) { - make_upper_ascii(c); - } - - /* If it's ALL uppercase ASCII, assume ISO 3166 country id. */ - if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) { - /* else handle special cases: - * - en_US_POSIX is an alias for C - * - M49 country code: 3 digits */ - if (info.language == "en" && tmp == "US_POSIX") { - info.language = "C"; - tmp.clear(); - } - else if (tmp.size() != 3u || - std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end()) - { - return false; - } - } - - info.country = tmp; - if (end >= input.size()) { - return true; - } - if (input[end] == '.') { - return parse_from_encoding(info, input.substr(end + 1)); - } - BLI_assert(input[end] == '@'); - return parse_from_variant(info, input.substr(end + 1)); -} - -static bool parse_from_script(Info &info, const std::string_view input) -{ - const auto end = input.find_first_of("-_@."); - std::string tmp(input.substr(0, end)); - /* Script is exactly 4 ASCII characters, otherwise it is not present. */ - if (tmp.length() != 4) { - return parse_from_country(info, input); - } - - for (char &c : tmp) { - if (!is_lower_ascii(c) && !make_lower_ascii(c)) { - return parse_from_country(info, input); - } - } - make_upper_ascii(tmp[0]); /* Capitalize first letter only. */ - info.script = tmp; - - if (end >= input.size()) { - return true; - } - if (input[end] == '-' || input[end] == '_') { - return parse_from_country(info, input.substr(end + 1)); - } - if (input[end] == '.') { - return parse_from_encoding(info, input.substr(end + 1)); - } - BLI_assert(input[end] == '@'); - return parse_from_variant(info, input.substr(end + 1)); -} - -static bool parse_from_lang(Info &info, const std::string_view input) -{ - const auto end = input.find_first_of("-_@."); - std::string tmp(input.substr(0, end)); - if (tmp.empty()) { - return false; - } - for (char &c : tmp) { - if (!is_lower_ascii(c) && !make_lower_ascii(c)) { - return false; - } - } - if (tmp != "c" && tmp != "posix") { /* Keep default if C or POSIX. */ - info.language = tmp; - } - - if (end >= input.size()) { - return true; - } - if (input[end] == '-' || input[end] == '_') { - return parse_from_script(info, input.substr(end + 1)); - } - if (input[end] == '.') { - return parse_from_encoding(info, input.substr(end + 1)); - } - BLI_assert(input[end] == '@'); - return parse_from_variant(info, input.substr(end + 1)); -} - /* Info about a locale. */ -Info::Info(const StringRef locale_full_name) -{ - std::string locale_name(locale_full_name); +class Info { + public: + std::string language = "C"; + std::string script; + std::string country; + std::string variant; - /* If locale name not specified, try to get the appropriate one from the system. */ + Info(const StringRef locale_full_name) + { + std::string locale_name(locale_full_name); + + /* If locale name not specified, try to get the appropriate one from the system. */ #if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL) - if (locale_name.empty()) { - locale_name = macos_user_locale(); - } + if (locale_name.empty()) { + locale_name = macos_user_locale(); + } #endif - if (locale_name.empty()) { - const char *lc_all = BLI_getenv("LC_ALL"); - if (lc_all) { - locale_name = lc_all; - } - } - if (locale_name.empty()) { - const char *lang = BLI_getenv("LANG"); - if (lang) { - locale_name = lang; - } - } - -#ifdef _WIN32 - if (locale_name.empty()) { - char buf[128] = {}; - if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) { - locale_name = buf; - if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) { - locale_name += "_"; - locale_name += buf; + if (locale_name.empty()) { + const char *lc_all = BLI_getenv("LC_ALL"); + if (lc_all) { + locale_name = lc_all; + } + } + if (locale_name.empty()) { + const char *lang = BLI_getenv("LANG"); + if (lang) { + locale_name = lang; + } + } + +#ifdef _WIN32 + if (locale_name.empty()) { + char buf[128] = {}; + if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO639LANGNAME, buf, sizeof(buf)) != 0) { + locale_name = buf; + if (GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SISO3166CTRYNAME, buf, sizeof(buf)) != 0) { + locale_name += "_"; + locale_name += buf; + } } } - } #endif - parse_from_lang(*this, locale_name); -} + parse_from_lang(locale_name); + } -std::string Info::to_full_name() const -{ - std::string result = language; - if (!script.empty()) { - result += '_' + script; + std::string to_full_name() const + { + std::string result = language; + if (!script.empty()) { + result += '_' + script; + } + if (!country.empty()) { + result += '_' + country; + } + if (!variant.empty()) { + result += '@' + variant; + } + return result; } - if (!country.empty()) { - result += '_' + country; + + private: + /* Locale parsing. */ + bool parse_from_variant(const std::string_view input) + { + if (language == "C" || input.empty()) { + return false; + } + variant = input; + /* No assumptions, just make it lowercase. */ + for (char &c : variant) { + make_lower_ascii(c); + } + return true; } - if (!variant.empty()) { - result += '@' + variant; + + bool parse_from_encoding(const std::string_view input) + { + const int64_t end = input.find_first_of('@'); + std::string tmp(input.substr(0, end)); + if (tmp.empty()) { + return false; + } + /* tmp contains encoding, we ignore it. */ + if (end >= input.size()) { + return true; + } + BLI_assert(input[end] == '@'); + return parse_from_variant(input.substr(end + 1)); } - return result; -} + + bool parse_from_country(const std::string_view input) + { + if (language == "C") { + return false; + } + + const int64_t end = input.find_first_of("@."); + std::string tmp(input.substr(0, end)); + if (tmp.empty()) { + return false; + } + + for (char &c : tmp) { + make_upper_ascii(c); + } + + /* If it's ALL uppercase ASCII, assume ISO 3166 country id. */ + if (std::find_if_not(tmp.begin(), tmp.end(), is_upper_ascii) != tmp.end()) { + /* else handle special cases: + * - en_US_POSIX is an alias for C + * - M49 country code: 3 digits */ + if (language == "en" && tmp == "US_POSIX") { + language = "C"; + tmp.clear(); + } + else if (tmp.size() != 3u || + std::find_if_not(tmp.begin(), tmp.end(), is_numeric_ascii) != tmp.end()) + { + return false; + } + } + + country = tmp; + if (end >= input.size()) { + return true; + } + if (input[end] == '.') { + return parse_from_encoding(input.substr(end + 1)); + } + BLI_assert(input[end] == '@'); + return parse_from_variant(input.substr(end + 1)); + } + + bool parse_from_script(const std::string_view input) + { + const int64_t end = input.find_first_of("-_@."); + std::string tmp(input.substr(0, end)); + /* Script is exactly 4 ASCII characters, otherwise it is not present. */ + if (tmp.length() != 4) { + return parse_from_country(input); + } + + for (char &c : tmp) { + if (!is_lower_ascii(c) && !make_lower_ascii(c)) { + return parse_from_country(input); + } + } + make_upper_ascii(tmp[0]); /* Capitalize first letter only. */ + script = tmp; + + if (end >= input.size()) { + return true; + } + if (input[end] == '-' || input[end] == '_') { + return parse_from_country(input.substr(end + 1)); + } + if (input[end] == '.') { + return parse_from_encoding(input.substr(end + 1)); + } + BLI_assert(input[end] == '@'); + return parse_from_variant(input.substr(end + 1)); + } + + bool parse_from_lang(const std::string_view input) + { + const int64_t end = input.find_first_of("-_@."); + std::string tmp(input.substr(0, end)); + if (tmp.empty()) { + return false; + } + for (char &c : tmp) { + if (!is_lower_ascii(c) && !make_lower_ascii(c)) { + return false; + } + } + if (tmp != "c" && tmp != "posix") { /* Keep default if C or POSIX. */ + language = tmp; + } + + if (end >= input.size()) { + return true; + } + if (input[end] == '-' || input[end] == '_') { + return parse_from_script(input.substr(end + 1)); + } + if (input[end] == '.') { + return parse_from_encoding(input.substr(end + 1)); + } + BLI_assert(input[end] == '@'); + return parse_from_variant(input.substr(end + 1)); + } +}; /* .mo file reader. */ @@ -418,17 +426,17 @@ inline bool operator==(const MessageKeyRef &a, const MessageKey &b) return a.context_ == b.context_ && a.str_ == b.str_; } -/* std::locale facet for translation based on .mo files. */ +/* Messages translation based on .mo files. */ -class MOMessageFacet : public MessageFacet { +class MOMessages { using Catalog = Map; Vector catalogs_; std::string error_; public: - MOMessageFacet(const Info &info, - const Vector &domains, - const Vector &paths) + MOMessages(const Info &info, + const Vector &domains, + const Vector &paths) { const Vector catalog_paths = get_catalog_paths(info, paths); for (size_t i = 0; i < domains.size(); i++) { @@ -444,9 +452,7 @@ class MOMessageFacet : public MessageFacet { } } - const char *translate(const int domain, - const StringRef context, - const StringRef str) const override + const char *translate(const int domain, const StringRef context, const StringRef str) const { if (domain < 0 || domain >= catalogs_.size()) { return nullptr; @@ -533,22 +539,47 @@ class MOMessageFacet : public MessageFacet { } }; -/* Install facet into std::locale. */ +/* Public API */ -std::locale::id MessageFacet::id; +static std::unique_ptr global_messages; +static std::string global_full_name; -std::locale MessageFacet::install(const std::locale &locale, - const Info &info, - const Vector &domains, - const Vector &paths) +void init(const StringRef locale_full_name, + const Vector &domains, + const Vector &paths) { - MOMessageFacet *facet = new MOMessageFacet(info, domains, paths); - if (!facet->error().empty()) { - throw std::runtime_error(facet->error()); - return locale; + Info info(locale_full_name); + if (global_full_name == info.to_full_name()) { + return; } - return std::locale(locale, facet); + global_messages = std::make_unique(info, domains, paths); + global_full_name = info.to_full_name(); + + if (!global_messages->error().empty()) { + printf("bl_locale_set(%s): %s\n", global_full_name.c_str(), global_messages->error().c_str()); + free(); + } +} + +void free() +{ + global_messages.reset(); + global_full_name = ""; +} + +const char *translate(const int domain, const StringRef context, const StringRef key) +{ + if (!global_messages) { + return nullptr; + } + + return global_messages->translate(domain, context, key); +} + +const char *full_name() +{ + return global_full_name.c_str(); } } // namespace blender::locale diff --git a/intern/locale/messages.h b/intern/locale/messages.h index 3a0f933406b..916ec1bbf80 100644 --- a/intern/locale/messages.h +++ b/intern/locale/messages.h @@ -3,7 +3,6 @@ * * Adapted from boost::locale */ -#include #include #include "BLI_string_ref.hh" @@ -11,34 +10,16 @@ namespace blender::locale { -/* Info about a locale. */ -struct Info { - Info(const StringRef locale_full_name); +void init(const StringRef locale_full_name, /* Local name. */ + const Vector &domains, /* Application names. */ + const Vector &paths); /* Search paths for .mo files. */ +void free(); - std::string language = "C"; - std::string script; - std::string country; - std::string variant; - - std::string to_full_name() const; +const char *translate(const int domain, const StringRef context, const StringRef key); +const char *full_name(); #if defined(__APPLE__) && !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL) - static std::string macos_user_locale(); +std::string macos_user_locale(); #endif -}; - -/* Message facet to install into std::locale for translation. */ -class MessageFacet : public std::locale::facet { - public: - static std::locale::id id; - static std::locale install(const std::locale &locale, - const Info &info, - const Vector &domains, /* Application names. */ - const Vector &paths); /* Search paths for .mo files. */ - - virtual const char *translate(const int domain, - const StringRef context, - const StringRef key) const = 0; -}; } // namespace blender::locale diff --git a/intern/locale/messages_apple.mm b/intern/locale/messages_apple.mm index c24b9777163..c818297c8c0 100644 --- a/intern/locale/messages_apple.mm +++ b/intern/locale/messages_apple.mm @@ -17,7 +17,7 @@ namespace blender::locale { #if !defined(WITH_HEADLESS) && !defined(WITH_GHOST_SDL) /* Get current locale. */ -std::string Info::macos_user_locale() +std::string macos_user_locale() { std::string result; diff --git a/source/blender/blentranslation/intern/blt_lang.cc b/source/blender/blentranslation/intern/blt_lang.cc index 90142e1197b..c15db05e245 100644 --- a/source/blender/blentranslation/intern/blt_lang.cc +++ b/source/blender/blentranslation/intern/blt_lang.cc @@ -227,8 +227,8 @@ void BLT_lang_init() void BLT_lang_free() { #ifdef WITH_INTERNATIONAL + bl_locale_free(); free_locales(); -#else #endif }