diff --git a/doc/lua_api.md b/doc/lua_api.md index 6fd3cb4f3..286c3b873 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -4006,8 +4006,9 @@ Translations Texts can be translated client-side with the help of `minetest.translate` and translation files. -Consider using the tool [update_translations](https://github.com/minetest-tools/update_translations) -to generate and update translation files automatically from the Lua source. +Consider using the script `util/mtt_update.py` in the Minetest repository +to generate and update translation files automatically from the Lua sources. +See `util/README_mtt_update.md` for an explanation. Translating a string -------------------- diff --git a/util/README_mtt_update.md b/util/README_mtt_update.md new file mode 100644 index 000000000..9fed19981 --- /dev/null +++ b/util/README_mtt_update.md @@ -0,0 +1,213 @@ +# `mtt_update.py`—Minetest Translation Updater + +This Python script is intended for use with localized Minetest mods, i.e., mods that use +`*.tr` and contain translatable strings of the form `S("This string can be translated")`. +It extracts the strings from the mod's source code and updates the localization files +accordingly. It can also be used to update the `*.tr` files in Minetest's `builtin` component. + +## Preparing your source code + +This script makes assumptions about your source code. Before it is usable, you first have +to prepare your source code accordingly. + +### Choosing the textdomain name + +It is recommended to set the textdomain name (for `minetest.get_translator`) to be identical +of the mod name as the script will automatically detect it. If the textdomain name differs, +you may have to manually change the `# textdomain:` line of newly generated files. + +**Note:** In each `*.tr` file, there **must** be only one textdomain. Multiple textdomains in +the same file are not supported by this script and any additional textdomain line will be +removed. + +### Defining the helper functions + +In any source code file with translatable strings, you have to manually define helper +functions at the top with something like `local S = minetest.get_translator("")`. +Optionally, you can also define additional helper functions `FS`, `NS` and `NFS` if needed. + +Here is the list of all recognized function names. All functions return a string. + +* `S`: Returns translation of input. See Minetest's `lua_api.md`. You should always have at + least this function defined. +* `NS`: Returns the input. Useful to make a string visible to the script without actually + translating it here. +* `FS`: Same as `S`, but returns a formspec-escaped version of the translation of the input. + Supported for convenience. +* `NFS`: Returns a formspec-escaped version of the input, but not translated. + Supported for convenience. + +Here is the boilerplate code you have to add at the top of your source code file: + + local S = minetest.get_translator("") + local NS = function(s) return s end + local FS = function(...) return minetest.formspec_escape(S(...)) end + local NFS = function(s) return minetest.formspec_escape(s) end + +Replace `` above and optionally delete `NS`, `FS` and/or `NFS` if you don't need +them. + +### Preparing the strings + +This script can detect translatable strings of the notations listed below. +Additional function arguments followed after a literal string are ignored. + +* `S("literal")`: one literal string enclosed by the delimiters + `"..."`, `'...'` or `[[...]]` +* `S("foo " .. 'bar ' .. "baz")`: concatenation of multiple literal strings. Line + breaks are accepted. + +The `S` may also be `NS`, `FS` and `NFS` (see above). + +Undetectable notations: + +* `S"literal"`: omitted function brackets +* `S(variable)`: requires the use of `NS`. See example below. +* `S("literal " .. variable)`: non-static content. + Use placeholders (`@1`, ...) for variable text. +* Any literal string concatenation using `[[...]]` + +### A minimal example + +This minimal code example sends "Hello world!" to all players, but translated according to +each player's language: + + local S = minetest.get_translator("example") + minetest.chat_send_all(S("Hello world!")) + +### How to use `NS` + +The reason why `NS` exists is for cases like this: Sometimes, you want to define a list of +strings to they can be later output in a function. Like so: + + local fruit = { "Apple", "Orange", "Pear" } + local function return_fruit(fruit_id) + return fruit[fruit_id] + end + +If you want to translate the fruit names when `return_fruit` is run, but have the +*untranslated* fruit names in the `fruit` table stored, this is where `NS` will help. +It will show the script the string without Minetest translating it. The script could be made +translatable like this: + + local fruit = { NS("Apple"), NS("Orange"), NS("Pear") } + local function return_fruit(fruit_id) + return S(fruit[fruit_id]) + end + +## How to run the script + +First, change the working directory to the directory of the mod you want the files to be +updated. From this directory, run the script. + +When you run the script, it will update the `template.txt` and any `*.tr` files present +in that mod's `/locale` folder. If the `/locale` folder or `template.txt` file don't +exist yet, they will be created. + +This script will also work in the root directory of a modpack. It will run on each mod +inside the modpack in that situation. Alternatively, you can run the script to update +the files of all mods in subdirectories with the `-r` option, which is useful to update +the locale files in an entire game. + +It has the following command line options: + + mtt_update.py [OPTIONS] [PATHS...] + + --help, -h: prints this help message + --recursive, -r: run on all subfolders of paths given + --old-file, -o: create copies of files before updating them, named `.old` + --break-long-lines, -b: add extra line-breaks before and after long strings + --print-source, -p: add comments denoting the source file + --verbose, -v: add output information + --truncate-unused, -t: delete unused strings from files + +## Script output + +This section explains how the output of this script works, roughly. This script aims to make +the output more or less stable, i.e. given identical source files and arguments, the script +should produce the same output. + +### Textdomain + +The script will add (if not already present) a `# textdomain: ` at the top, where +`` is identical to the mod directory name. If a `# textdomain` already exists, it +will be moved to the top, with the textdomain name being left intact (even if it differs +from the mod name). + +**Note:** If there are multiple `# textdomain:` lines in the file, all of them except the +first one will be deleted. This script only supports one textdomain per `*.tr` file. + +### Strings + +The order of the strings is deterministic and follows certain rules: First, all strings are +grouped by the source `*.lua` file. The files are loaded in alphabetical order. In case of +subdirectories, the mod's root directory takes precedence, then the directories are traversed +in top-down alphabetical order. Second, within each file, the strings are then inserted in +the same order as they appear in the source code. + +If a string appears multiple times in the source code, the string will be added when it was +first found only. + +Don't bother to manually organize the order of the lines in the file yourself because the +script will just reorder everything. + +If the mod's source changes in such a way that a line with an existing translation or comment +is no longer present, and `--truncate-unused` or `-t` are *not* provided as arguments, the +unused line will be moved to the bottom of the translation file under a special comment: + + ##### not used anymore ##### + +This allows for old translations and comments to be reused with new lines where appropriate. +This script doesn't attempt "fuzzy" matching of old strings to new, so even a single change +of punctuation or spelling will put strings into the "not used anymore" section and require +manual re-association with the new string. + +### Comments + +The script will preserve any comments in an existing `template.txt` or the various `*.tr` +files, associating them with the line that follows them. So for example: + + # This comment pertains to Some Text + Some text= + + # Multi-line comments + # are also supported + Text as well= + +There are also a couple of special comments that this script gives special treatment to. + +#### Source file comments + +If `--print-source` or `-p` is provided as option, the script will insert comments to show +from which file or files each string has come from. +This is the syntax of such a comment: + + ##[ file.lua ]## + +This comment means that all lines following it belong to the file `file.lua`. In the special +case the same string was found in multiple files, multiple file name comments will be used in +row, like so: + + ##[ file1.lua ]## + ##[ file2.lua ]## + ##[ file3.lua ]## + example=Beispiel + +This means the string "example" was found in the files `file1.lua`, `file2.lua` and +`file3.lua`. + +If the print source option is not provided, these comments will disappear. + +Note that all comments of the form `##[something]##` will be treated as "source file" comments +so they may be moved, changed or removed by the script at will. + +#### "not used anymore" section + +By default, the exact comment `##### not used anymore #####` will be automatically added to +mark the beginning of a section where old/unused strings will go. Leave the exact wording of +this comment intact so this line can be moved (or removed) properly in subsequent runs. + +## Updating `builtin` + +To update the `builtin` component of Minetest, change the working directory to `builtin` of +the Minetest source code repository, then run this script from there. diff --git a/util/mtt_update.py b/util/mtt_update.py new file mode 100755 index 000000000..a6b3286b3 --- /dev/null +++ b/util/mtt_update.py @@ -0,0 +1,466 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Script to generate Minetest translation template files and update +# translation files. +# +# Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer, +# 2023 Wuzzy. +# License: LGPLv2.1 or later (see LICENSE file for details) + +from __future__ import print_function +import os, fnmatch, re, shutil, errno +from sys import argv as _argv +from sys import stderr as _stderr + +# Running params +params = {"recursive": False, + "help": False, + "verbose": False, + "folders": [], + "old-file": False, + "break-long-lines": False, + "print-source": False, + "truncate-unused": False, +} +# Available CLI options +options = {"recursive": ['--recursive', '-r'], + "help": ['--help', '-h'], + "verbose": ['--verbose', '-v'], + "old-file": ['--old-file', '-o'], + "break-long-lines": ['--break-long-lines', '-b'], + "print-source": ['--print-source', '-p'], + "truncate-unused": ['--truncate-unused', '-t'], +} + +# Strings longer than this will have extra space added between +# them in the translation files to make it easier to distinguish their +# beginnings and endings at a glance +doublespace_threshold = 80 + +# These symbols mark comment lines showing the source file name. +# A comment may look like "##[ init.lua ]##". +symbol_source_prefix = "##[" +symbol_source_suffix = "]##" + +# comment to mark the section of old/unused strings +comment_unused = "##### not used anymore #####" + +def set_params_folders(tab: list): + '''Initialize params["folders"] from CLI arguments.''' + # Discarding argument 0 (tool name) + for param in tab[1:]: + stop_param = False + for option in options: + if param in options[option]: + stop_param = True + break + if not stop_param: + params["folders"].append(os.path.abspath(param)) + +def set_params(tab: list): + '''Initialize params from CLI arguments.''' + for option in options: + for option_name in options[option]: + if option_name in tab: + params[option] = True + break + +def print_help(name): + '''Prints some help message.''' + print(f'''SYNOPSIS + {name} [OPTIONS] [PATHS...] +DESCRIPTION + {', '.join(options["help"])} + prints this help message + {', '.join(options["recursive"])} + run on all subfolders of paths given + {', '.join(options["old-file"])} + create *.old files + {', '.join(options["break-long-lines"])} + add extra line breaks before and after long strings + {', '.join(options["print-source"])} + add comments denoting the source file + {', '.join(options["verbose"])} + add output information + {', '.join(options["truncate-unused"])} + delete unused strings from files +''') + +def main(): + '''Main function''' + set_params(_argv) + set_params_folders(_argv) + if params["help"]: + print_help(_argv[0]) + else: + # Add recursivity message + print("Running ", end='') + if params["recursive"]: + print("recursively ", end='') + # Running + if len(params["folders"]) >= 2: + print("on folder list:", params["folders"]) + for f in params["folders"]: + if params["recursive"]: + run_all_subfolders(f) + else: + update_folder(f) + elif len(params["folders"]) == 1: + print("on folder", params["folders"][0]) + if params["recursive"]: + run_all_subfolders(params["folders"][0]) + else: + update_folder(params["folders"][0]) + else: + print("on folder", os.path.abspath("./")) + if params["recursive"]: + run_all_subfolders(os.path.abspath("./")) + else: + update_folder(os.path.abspath("./")) + +# Group 2 will be the string, groups 1 and 3 will be the delimiters (" or ') +# See https://stackoverflow.com/questions/46967465/regex-match-text-in-either-single-or-double-quote +pattern_lua_quoted = re.compile(r'[\.=^\t,{\(\s]N?F?S\s*\(\s*(["\'])((?:\\\1|(?:(?!\1)).)*)(\1)[\s,\)]', re.DOTALL) +# Handles the [[ ... ]] string delimiters +pattern_lua_bracketed = re.compile(r'[\.=^\t,{\(\s]N?F?S\s*\(\s*\[\[(.*?)\]\][\s,\)]', re.DOTALL) + +# Handles "concatenation" .. " of strings" +pattern_concat = re.compile(r'["\'][\s]*\.\.[\s]*["\']', re.DOTALL) + +pattern_tr = re.compile(r'(.*?[^@])=(.*)') +pattern_name = re.compile(r'^name[ ]*=[ ]*([^ \n]*)') +pattern_tr_filename = re.compile(r'\.tr$') + +# Attempt to read the mod's name from the mod.conf file or folder name. Returns None on failure +def get_modname(folder): + try: + with open(os.path.join(folder, "mod.conf"), "r", encoding='utf-8') as mod_conf: + for line in mod_conf: + match = pattern_name.match(line) + if match: + return match.group(1) + except FileNotFoundError: + if not os.path.isfile(os.path.join(folder, "modpack.txt")): + folder_name = os.path.basename(folder) + # Special case when run in Minetest's builtin directory + if folder_name == "builtin": + return "__builtin" + else: + return folder_name + else: + return None + return None + +# If there are already .tr files in /locale, returns a list of their names +def get_existing_tr_files(folder): + out = [] + for root, dirs, files in os.walk(os.path.join(folder, 'locale/')): + for name in files: + if pattern_tr_filename.search(name): + out.append(name) + return out + +# from https://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python/600612#600612 +# Creates a directory if it doesn't exist, silently does +# nothing if it already exists +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as exc: # Python >2.5 + if exc.errno == errno.EEXIST and os.path.isdir(path): + pass + else: raise + +# Converts the template dictionary to a text to be written as a file +# dKeyStrings is a dictionary of localized string to source file sets +# dOld is a dictionary of existing translations and comments from +# the previous version of this text +def strings_to_text(dkeyStrings, dOld, mod_name, header_comments, textdomain): + # if textdomain is specified, insert it at the top + if textdomain != None: + lOut = [textdomain] # argument is full textdomain line + # otherwise, use mod name as textdomain automatically + else: + lOut = [f"# textdomain: {mod_name}"] + if header_comments is not None: + lOut.append(header_comments) + + dGroupedBySource = {} + + for key in dkeyStrings: + sourceList = list(dkeyStrings[key]) + sourceString = "\n".join(sourceList) + listForSource = dGroupedBySource.get(sourceString, []) + listForSource.append(key) + dGroupedBySource[sourceString] = listForSource + + lSourceKeys = list(dGroupedBySource.keys()) + lSourceKeys.sort() + for source in lSourceKeys: + localizedStrings = dGroupedBySource[source] + if params["print-source"]: + if lOut[-1] != "": + lOut.append("") + lOut.append(source) + for localizedString in localizedStrings: + val = dOld.get(localizedString, {}) + translation = val.get("translation", "") + comment = val.get("comment") + if params["break-long-lines"] and len(localizedString) > doublespace_threshold and not lOut[-1] == "": + lOut.append("") + if comment != None and comment != "" and not comment.startswith("# textdomain:"): + lOut.append(comment) + lOut.append(f"{localizedString}={translation}") + if params["break-long-lines"] and len(localizedString) > doublespace_threshold: + lOut.append("") + + unusedExist = False + if not params["truncate-unused"]: + for key in dOld: + if key not in dkeyStrings: + val = dOld[key] + translation = val.get("translation") + comment = val.get("comment") + # only keep an unused translation if there was translated + # text or a comment associated with it + if translation != None and (translation != "" or comment): + if not unusedExist: + unusedExist = True + lOut.append("\n\n" + comment_unused + "\n") + if params["break-long-lines"] and len(key) > doublespace_threshold and not lOut[-1] == "": + lOut.append("") + if comment != None: + lOut.append(comment) + lOut.append(f"{key}={translation}") + if params["break-long-lines"] and len(key) > doublespace_threshold: + lOut.append("") + return "\n".join(lOut) + '\n' + +# Writes a template.txt file +# dkeyStrings is the dictionary returned by generate_template +def write_template(templ_file, dkeyStrings, mod_name): + # read existing template file to preserve comments + existing_template = import_tr_file(templ_file) + + text = strings_to_text(dkeyStrings, existing_template[0], mod_name, existing_template[2], existing_template[3]) + mkdir_p(os.path.dirname(templ_file)) + with open(templ_file, "wt", encoding='utf-8') as template_file: + template_file.write(text) + +# Gets all translatable strings from a lua file +def read_lua_file_strings(lua_file): + lOut = [] + with open(lua_file, encoding='utf-8') as text_file: + text = text_file.read() + + text = re.sub(pattern_concat, "", text) + + strings = [] + for s in pattern_lua_quoted.findall(text): + strings.append(s[1]) + for s in pattern_lua_bracketed.findall(text): + strings.append(s) + + for s in strings: + s = re.sub(r'"\.\.\s+"', "", s) + s = re.sub("@[^@=0-9]", "@@", s) + s = s.replace('\\"', '"') + s = s.replace("\\'", "'") + s = s.replace("\n", "@n") + s = s.replace("\\n", "@n") + s = s.replace("=", "@=") + lOut.append(s) + return lOut + +# Gets strings from an existing translation file +# returns both a dictionary of translations +# and the full original source text so that the new text +# can be compared to it for changes. +# Returns also header comments in the third return value. +def import_tr_file(tr_file): + dOut = {} + text = None + in_header = True + header_comments = None + textdomain = None + if os.path.exists(tr_file): + with open(tr_file, "r", encoding='utf-8') as existing_file : + # save the full text to allow for comparison + # of the old version with the new output + text = existing_file.read() + existing_file.seek(0) + # a running record of the current comment block + # we're inside, to allow preceeding multi-line comments + # to be retained for a translation line + latest_comment_block = None + for line in existing_file.readlines(): + line = line.rstrip('\n') + # "##### not used anymore #####" comment + if line == comment_unused: + # Always delete the 'not used anymore' comment. + # It will be re-added to the file if neccessary. + latest_comment_block = None + if header_comments != None: + in_header = False + continue + # comment lines + elif line.startswith("#"): + # source file comments: ##[ file.lua ] ## + if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix): + # remove those comments; they may be added back automatically + continue + + # Store first occurance of textdomain + # discard all subsequent textdomain lines + if line.startswith("# textdomain:"): + if textdomain == None: + textdomain = line + continue + elif in_header: + # Save header comments (normal comments at top of file) + if not header_comments: + header_comments = line + else: + header_comments = header_comments + "\n" + line + else: + # Save normal comments + if line.startswith("# textdomain:") and textdomain == None: + textdomain = line + elif not latest_comment_block: + latest_comment_block = line + else: + latest_comment_block = latest_comment_block + "\n" + line + + continue + + match = pattern_tr.match(line) + if match: + # this line is a translated line + outval = {} + outval["translation"] = match.group(2) + if latest_comment_block: + # if there was a comment, record that. + outval["comment"] = latest_comment_block + latest_comment_block = None + if header_comments != None: + in_header = False + + dOut[match.group(1)] = outval + return (dOut, text, header_comments, textdomain) + +# like os.walk but returns sorted filenames +def sorted_os_walk(folder): + tuples = [] + t = 0 + for root, dirs, files in os.walk(folder): + tuples.append( (root, dirs, files) ) + t = t + 1 + + tuples = sorted(tuples) + + paths_and_files = [] + f = 0 + + for tu in tuples: + root = tu[0] + dirs = tu[1] + files = tu[2] + files = sorted(files, key=str.lower) + for filename in files: + paths_and_files.append( (os.path.join(root, filename), filename) ) + f = f + 1 + return paths_and_files + +# Walks all lua files in the mod folder, collects translatable strings, +# and writes it to a template.txt file +# Returns a dictionary of localized strings to source file lists +# that can be used with the strings_to_text function. +def generate_template(folder, mod_name): + dOut = {} + paths_and_files = sorted_os_walk(folder) + for paf in paths_and_files: + fullpath_filename = paf[0] + filename = paf[1] + if fnmatch.fnmatch(filename, "*.lua"): + found = read_lua_file_strings(fullpath_filename) + if params["verbose"]: + print(f"{fullpath_filename}: {str(len(found))} translatable strings") + + for s in found: + sources = dOut.get(s, set()) + sources.add(os.path.relpath(fullpath_filename, start=folder)) + dOut[s] = sources + + if len(dOut) == 0: + return None + + # Convert source file set to list, sort it and add comment symbols. + # Needed because a set is unsorted and might result in unpredictable. + # output orders if any source string appears in multiple files. + for d in dOut: + sources = dOut.get(d, set()) + sources = sorted(list(sources), key=str.lower) + newSources = [] + for i in sources: + newSources.append(f"{symbol_source_prefix} {i} {symbol_source_suffix}") + dOut[d] = newSources + + templ_file = os.path.join(folder, "locale/template.txt") + write_template(templ_file, dOut, mod_name) + return dOut + +# Updates an existing .tr file, copying the old one to a ".old" file +# if any changes have happened +# dNew is the data used to generate the template, it has all the +# currently-existing localized strings +def update_tr_file(dNew, mod_name, tr_file): + if params["verbose"]: + print(f"updating {tr_file}") + + tr_import = import_tr_file(tr_file) + dOld = tr_import[0] + textOld = tr_import[1] + + textNew = strings_to_text(dNew, dOld, mod_name, tr_import[2], tr_import[3]) + + if textOld and textOld != textNew: + print(f"{tr_file} has changed.") + if params["old-file"]: + shutil.copyfile(tr_file, f"{tr_file}.old") + + with open(tr_file, "w", encoding='utf-8') as new_tr_file: + new_tr_file.write(textNew) + +# Updates translation files for the mod in the given folder +def update_mod(folder): + modname = get_modname(folder) + if modname is not None: + print(f"Updating translations for {modname}") + data = generate_template(folder, modname) + if data == None: + print(f"No translatable strings found in {modname}") + else: + for tr_file in get_existing_tr_files(folder): + update_tr_file(data, modname, os.path.join(folder, "locale/", tr_file)) + else: + print(f"Unable to determine the mod name in folder {folder}. Missing 'name' field in mod.conf.", file=_stderr) + exit(1) + +# Determines if the folder being pointed to is a mod or a mod pack +# and then runs update_mod accordingly +def update_folder(folder): + is_modpack = os.path.exists(os.path.join(folder, "modpack.txt")) or os.path.exists(os.path.join(folder, "modpack.conf")) + if is_modpack: + subfolders = [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')] + for subfolder in subfolders: + update_mod(subfolder) + else: + update_mod(folder) + print("Done.") + +def run_all_subfolders(folder): + for modfolder in [f.path for f in os.scandir(folder) if f.is_dir() and not f.name.startswith('.')]: + update_folder(modfolder) + +main()