Files
test2/release/scripts/modules/i18n/utils.py
Bastien Montagne 2d8a9a0cc3 Adding (moving from bf-translation) the i18n python module. This will make it available for future "UI Translation" addon, and probably other UI-related tools as well.
Notes:
* This is a somewhat reworked version of what is currently in bf-translation's trunk/po/tools, not yet fully functionnal (well, 95% is ;) ) nor fully tested. ultimately, it will replace it (being "svn-linked" in bf-translation).
* Added feature: more complete/strict tests (yet some work to be done here).
* Added spell checking (huge spellcheck commit incomming...).
* Trying to get rid of xgettext itself (should e.g. allow us to use #defines as contexts, among other things...). But currently captures less strings, work needed here too.

Please note this includes libfribidi.dll, as it is hard to find it for windows (unixes should have no problems here).
2012-07-02 19:51:06 +00:00

378 lines
14 KiB
Python

# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
# ***** END GPL LICENSE BLOCK *****
# <pep8 compliant>
# Some misc utilities...
import os
import sys
import collections
from codecs import open
import settings
COMMENT_PREFIX = settings.COMMENT_PREFIX
WARN_NC = settings.WARN_MSGID_NOT_CAPITALIZED
NC_ALLOWED = settings.WARN_MSGID_NOT_CAPITALIZED_ALLOWED
def stripeol(s):
return s.rstrip("\n\r")
# XXX For now, we assume that all messages > 30 chars are tooltips!
def is_tooltip(msgid):
return len(msgid) > 30
def parse_messages(fname):
"""
Returns a tupple (messages, states, stats).
messages is an odereddict of dicts
{(ctxt, msgid): {msgid_lines:, msgstr_lines:,
comment_lines:, msgctxt_lines:}}.
states is a dict of three sets of (msgid, ctxt), and a boolean flag
indicating the .po is somewhat broken
{trans_msg:, fuzzy_msg:, comm_msg:, is_broken:}.
stats is a dict of values
{tot_msg:, trans_msg:, tot_ttips:, trans_ttips:, comm_msg:,
nbr_signs:, nbr_trans_signs:, contexts: set()}.
Note: This function will silently "arrange" mis-formated entries, thus
using afterward write_messages() should always produce a po-valid file,
though not correct!
"""
tot_messages = 0
tot_tooltips = 0
trans_messages = 0
trans_tooltips = 0
comm_messages = 0
nbr_signs = 0
nbr_trans_signs = 0
contexts = set()
reading_msgid = False
reading_msgstr = False
reading_msgctxt = False
reading_comment = False
is_translated = False
is_fuzzy = False
is_commented = False
is_broken = False
msgid_lines = []
msgstr_lines = []
msgctxt_lines = []
comment_lines = []
messages = getattr(collections, 'OrderedDict', dict)()
translated_messages = set()
fuzzy_messages = set()
commented_messages = set()
def clean_vars():
nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
reading_comment, is_fuzzy, is_translated, is_commented, \
msgid_lines, msgstr_lines, msgctxt_lines, comment_lines
reading_msgid = reading_msgstr = reading_msgctxt = \
reading_comment = False
is_tooltip = is_fuzzy = is_translated = is_commented = False
msgid_lines = []
msgstr_lines = []
msgctxt_lines = []
comment_lines = []
def finalize_message():
nonlocal reading_msgid, reading_msgstr, reading_msgctxt, \
reading_comment, is_fuzzy, is_translated, is_commented, \
msgid_lines, msgstr_lines, msgctxt_lines, comment_lines, \
messages, translated_messages, fuzzy_messages, \
commented_messages, \
tot_messages, tot_tooltips, trans_messages, trans_tooltips, \
comm_messages, nbr_signs, nbr_trans_signs, contexts
msgid = "".join(msgid_lines)
msgctxt = "".join(msgctxt_lines)
msgkey = (msgctxt, msgid)
is_ttip = is_tooltip(msgid)
# Never allow overriding existing msgid/msgctxt pairs!
if msgkey in messages:
clean_vars()
return
nbr_signs += len(msgid)
if is_commented:
commented_messages.add(msgkey)
elif is_fuzzy:
fuzzy_messages.add(msgkey)
elif is_translated:
translated_messages.add(msgkey)
nbr_trans_signs += len("".join(msgstr_lines))
messages[msgkey] = {"msgid_lines" : msgid_lines,
"msgstr_lines" : msgstr_lines,
"comment_lines": comment_lines,
"msgctxt_lines": msgctxt_lines}
if is_commented:
comm_messages += 1
else:
tot_messages += 1
if is_ttip:
tot_tooltips += 1
if not is_fuzzy and is_translated:
trans_messages += 1
if is_ttip:
trans_tooltips += 1
if msgctxt not in contexts:
contexts.add(msgctxt)
clean_vars()
with open(fname, 'r', "utf-8") as f:
for line_nr, line in enumerate(f):
line = stripeol(line)
if line == "":
finalize_message()
elif line.startswith("msgctxt") or \
line.startswith("".join((COMMENT_PREFIX, "msgctxt"))):
reading_comment = False
reading_ctxt = True
if line.startswith(COMMENT_PREFIX):
is_commented = True
line = line[9+len(COMMENT_PREFIX):-1]
else:
line = line[9:-1]
msgctxt_lines.append(line)
elif line.startswith("msgid") or \
line.startswith("".join((COMMENT_PREFIX, "msgid"))):
reading_comment = False
reading_msgid = True
if line.startswith(COMMENT_PREFIX):
is_commented = True
line = line[7+len(COMMENT_PREFIX):-1]
else:
line = line[7:-1]
msgid_lines.append(line)
elif line.startswith("msgstr") or \
line.startswith("".join((COMMENT_PREFIX, "msgstr"))):
if not reading_msgid:
is_broken = True
else:
reading_msgid = False
reading_msgstr = True
if line.startswith(COMMENT_PREFIX):
line = line[8+len(COMMENT_PREFIX):-1]
if not is_commented:
is_broken = True
else:
line = line[8:-1]
if is_commented:
is_broken = True
msgstr_lines.append(line)
if line:
is_translated = True
elif line.startswith("#"):
if reading_msgid:
if is_commented:
msgid_lines.append(line[1+len(COMMENT_PREFIX):-1])
else:
msgid_lines.append(line)
is_broken = True
elif reading_msgstr:
if is_commented:
msgstr_lines.append(line[1+len(COMMENT_PREFIX):-1])
else:
msgstr_lines.append(line)
is_broken = True
else:
if line.startswith("#, fuzzy"):
is_fuzzy = True
else:
comment_lines.append(line)
reading_comment = True
else:
if reading_msgid:
msgid_lines.append(line[1:-1])
elif reading_msgstr:
line = line[1:-1]
msgstr_lines.append(line)
if not is_translated and line:
is_translated = True
else:
is_broken = True
# If no final empty line, last message is not finalized!
if reading_msgstr:
finalize_message()
return (messages,
{"trans_msg": translated_messages,
"fuzzy_msg": fuzzy_messages,
"comm_msg" : commented_messages,
"is_broken": is_broken},
{"tot_msg" : tot_messages,
"trans_msg" : trans_messages,
"tot_ttips" : tot_tooltips,
"trans_ttips" : trans_tooltips,
"comm_msg" : comm_messages,
"nbr_signs" : nbr_signs,
"nbr_trans_signs": nbr_trans_signs,
"contexts" : contexts})
def write_messages(fname, messages, commented, fuzzy):
"Write in fname file the content of messages (similar to parse_messages " \
"returned values). commented and fuzzy are two sets containing msgid. " \
"Returns the number of written messages."
num = 0
with open(fname, 'w', "utf-8") as f:
for msgkey, val in messages.items():
msgctxt, msgid = msgkey
f.write("\n".join(val["comment_lines"]))
# Only mark as fuzzy if msgstr is not empty!
if msgkey in fuzzy and "".join(val["msgstr_lines"]):
f.write("\n#, fuzzy")
if msgkey in commented:
if msgctxt:
f.write("\n{}msgctxt \"".format(COMMENT_PREFIX))
f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
val["msgctxt_lines"]))
f.write("\"")
f.write("\n{}msgid \"".format(COMMENT_PREFIX))
f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
val["msgid_lines"]))
f.write("\"\n{}msgstr \"".format(COMMENT_PREFIX))
f.write("\"\n{}\"".format(COMMENT_PREFIX).join(
val["msgstr_lines"]))
f.write("\"\n\n")
else:
if msgctxt:
f.write("\nmsgctxt \"")
f.write("\"\n\"".join(val["msgctxt_lines"]))
f.write("\"")
f.write("\nmsgid \"")
f.write("\"\n\"".join(val["msgid_lines"]))
f.write("\"\nmsgstr \"")
f.write("\"\n\"".join(val["msgstr_lines"]))
f.write("\"\n\n")
num += 1
return num
def gen_empty_messages(blender_rev, time_str, year_str):
"""Generate an empty messages & state data (only header if present!)."""
header_key = ("", "")
messages = getattr(collections, 'OrderedDict', dict)()
messages[header_key] = {
"msgid_lines": [""],
"msgctxt_lines": [],
"msgstr_lines": [
"Project-Id-Version: Blender r{}\\n"
"".format(blender_rev),
"Report-Msgid-Bugs-To: \\n",
"POT-Creation-Date: {}\\n"
"".format(time_str),
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n",
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n",
"Language-Team: LANGUAGE <LL@li.org>\\n",
"Language: \\n",
"MIME-Version: 1.0\\n",
"Content-Type: text/plain; charset=UTF-8\\n",
"Content-Transfer-Encoding: 8bit\\n"
],
"comment_lines": [
"# Blender's translation file (po format).",
"# Copyright (C) {} The Blender Foundation."
"".format(year_str),
"# This file is distributed under the same "
"# license as the Blender package.",
"# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.",
"#",
],
}
states = {"trans_msg": set(),
"fuzzy_msg": {header_key},
"comm_msg": set(),
"is_broken": False}
return messages, states
def print_stats(stats, glob_stats=None, prefix=""):
"""
Print out some stats about a po file.
glob_stats is for making global stats over several po's.
"""
tot_msgs = stats["tot_msg"]
trans_msgs = stats["trans_msg"]
tot_ttips = stats["tot_ttips"]
trans_ttips = stats["trans_ttips"]
comm_msgs = stats["comm_msg"]
nbr_signs = stats["nbr_signs"]
nbr_trans_signs = stats["nbr_trans_signs"]
contexts = stats["contexts"]
lvl = lvl_ttips = lvl_trans_ttips = lvl_ttips_in_trans = lvl_comm = 0.0
if tot_msgs > 0:
lvl = float(trans_msgs)/float(tot_msgs)
lvl_ttips = float(tot_ttips)/float(tot_msgs)
lvl_comm = float(comm_msgs)/float(tot_msgs+comm_msgs)
if tot_ttips > 0:
lvl_trans_ttips = float(trans_ttips)/float(tot_ttips)
if trans_msgs > 0:
lvl_ttips_in_trans = float(trans_ttips)/float(trans_msgs)
if glob_stats:
glob_stats["nbr"] += 1.0
glob_stats["lvl"] += lvl
glob_stats["lvl_ttips"] += lvl_ttips
glob_stats["lvl_trans_ttips"] += lvl_trans_ttips
glob_stats["lvl_ttips_in_trans"] += lvl_ttips_in_trans
glob_stats["lvl_comm"] += lvl_comm
glob_stats["nbr_trans_signs"] += nbr_trans_signs
if glob_stats["nbr_signs"] == 0:
glob_stats["nbr_signs"] = nbr_signs
glob_stats["contexts"] |= contexts
lines = ("",
"{:>6.1%} done! ({} translated messages over {}).\n"
"".format(lvl, trans_msgs, tot_msgs),
"{:>6.1%} of messages are tooltips ({} over {}).\n"
"".format(lvl_ttips, tot_ttips, tot_msgs),
"{:>6.1%} of tooltips are translated ({} over {}).\n"
"".format(lvl_trans_ttips, trans_ttips, tot_ttips),
"{:>6.1%} of translated messages are tooltips ({} over {}).\n"
"".format(lvl_ttips_in_trans, trans_ttips, trans_msgs),
"{:>6.1%} of messages are commented ({} over {}).\n"
"".format(lvl_comm, comm_msgs, comm_msgs+tot_msgs),
"This translation is currently made of {} signs.\n"
"".format(nbr_trans_signs))
print(prefix.join(lines))
return 0