Move i18n scripts under scripts/ (#4044)

* Move i18n scripts under scripts/
This commit is contained in:
Sam El-Husseini
2020-07-14 12:07:56 -07:00
committed by GitHub
parent c0a0745f85
commit 55e6f8000e
9 changed files with 10 additions and 10 deletions

View File

@@ -407,7 +407,7 @@ goog.require('Blockly.requires')
*/
function buildLangfiles(done) {
// Run js_to_json.py
const jsToJsonCmd = `python ./i18n/js_to_json.py \
const jsToJsonCmd = `python ./scripts/i18n/js_to_json.py \
--input_file ${path.join('msg', 'messages.js')} \
--output_dir ${path.join('msg', 'json')} \
--quiet`;
@@ -418,7 +418,7 @@ function buildLangfiles(done) {
json_files = json_files.filter(file => file.endsWith('json') &&
!(new RegExp(/(keys|synonyms|qqq|constants)\.json$/).test(file)));
json_files = json_files.map(file => path.join('msg', 'json', file));
const createMessagesCmd = `python ./i18n/create_messages.py \
const createMessagesCmd = `python ./scripts/i18n/create_messages.py \
--source_lang_file ${path.join('msg', 'json', 'en.json')} \
--source_synonym_file ${path.join('msg', 'json', 'synonyms.json')} \
--source_constants_file ${path.join('msg', 'json', 'constants.json')} \

233
scripts/i18n/common.py Normal file
View File

@@ -0,0 +1,233 @@
#!/usr/bin/python
# Code shared by translation conversion scripts.
#
# Copyright 2013 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import codecs
import json
import os
from datetime import datetime
class InputError(Exception):
"""Exception raised for errors in the input.
Attributes:
location -- where error occurred
msg -- explanation of the error
"""
def __init__(self, location, msg):
Exception.__init__(self, '{0}: {1}'.format(location, msg))
self.location = location
self.msg = msg
def read_json_file(filename):
"""Read a JSON file as UTF-8 into a dictionary, discarding @metadata.
Args:
filename: The filename, which must end ".json".
Returns:
The dictionary.
Raises:
InputError: The filename did not end with ".json" or an error occurred
while opening or reading the file.
"""
if not filename.endswith('.json'):
raise InputError(filename, 'filenames must end with ".json"')
try:
# Read in file.
with codecs.open(filename, 'r', 'utf-8') as infile:
defs = json.load(infile)
if '@metadata' in defs:
del defs['@metadata']
return defs
except ValueError as e:
print('Error reading ' + filename)
raise InputError(filename, str(e))
def _create_qqq_file(output_dir):
"""Creates a qqq.json file with message documentation for translatewiki.net.
The file consists of key-value pairs, where the keys are message ids and
the values are descriptions for the translators of the messages.
What documentation exists for the format can be found at:
http://translatewiki.net/wiki/Translating:Localisation_for_developers#Message_documentation
The file should be closed by _close_qqq_file().
Parameters:
output_dir: The output directory.
Returns:
A pointer to a file to which a left brace and newline have been written.
Raises:
IOError: An error occurred while opening or writing the file.
"""
qqq_file_name = os.path.join(os.curdir, output_dir, 'qqq.json')
qqq_file = codecs.open(qqq_file_name, 'w', 'utf-8')
print('Created file: ' + qqq_file_name)
qqq_file.write('{\n')
return qqq_file
def _close_qqq_file(qqq_file):
"""Closes a qqq.json file created and opened by _create_qqq_file().
This writes the final newlines and right brace.
Args:
qqq_file: A file created by _create_qqq_file().
Raises:
IOError: An error occurred while writing to or closing the file.
"""
qqq_file.write('\n}\n')
qqq_file.close()
def _create_lang_file(author, lang, output_dir):
"""Creates a <lang>.json file for translatewiki.net.
The file consists of metadata, followed by key-value pairs, where the keys
are message ids and the values are the messages in the language specified
by the corresponding command-line argument. The file should be closed by
_close_lang_file().
Args:
author: Name and email address of contact for translators.
lang: ISO 639-1 source language code.
output_dir: Relative directory for output files.
Returns:
A pointer to a file to which the metadata has been written.
Raises:
IOError: An error occurred while opening or writing the file.
"""
lang_file_name = os.path.join(os.curdir, output_dir, lang + '.json')
lang_file = codecs.open(lang_file_name, 'w', 'utf-8')
print('Created file: ' + lang_file_name)
# string.format doesn't like printing braces, so break up our writes.
lang_file.write('{\n\t"@metadata": {')
lang_file.write("""
\t\t"author": "{0}",
\t\t"lastupdated": "{1}",
\t\t"locale": "{2}",
\t\t"messagedocumentation" : "qqq"
""".format(author, str(datetime.now()), lang))
lang_file.write('\t},\n')
return lang_file
def _close_lang_file(lang_file):
"""Closes a <lang>.json file created with _create_lang_file().
This also writes the terminating left brace and newline.
Args:
lang_file: A file opened with _create_lang_file().
Raises:
IOError: An error occurred while writing to or closing the file.
"""
lang_file.write('\n}\n')
lang_file.close()
def _create_key_file(output_dir):
"""Creates a keys.json file mapping Closure keys to Blockly keys.
Args:
output_dir: Relative directory for output files.
Raises:
IOError: An error occurred while creating the file.
"""
key_file_name = os.path.join(os.curdir, output_dir, 'keys.json')
key_file = open(key_file_name, 'w')
key_file.write('{\n')
print('Created file: ' + key_file_name)
return key_file
def _close_key_file(key_file):
"""Closes a key file created and opened with _create_key_file().
Args:
key_file: A file created by _create_key_file().
Raises:
IOError: An error occurred while writing to or closing the file.
"""
key_file.write('\n}\n')
key_file.close()
def write_files(author, lang, output_dir, units, write_key_file):
"""Writes the output files for the given units.
There are three possible output files:
* lang_file: JSON file mapping meanings (e.g., Maze.turnLeft) to the
English text. The base name of the language file is specified by the
"lang" command-line argument.
* key_file: JSON file mapping meanings to Soy-generated keys (long hash
codes). This is only output if the parameter write_key_file is True.
* qqq_file: JSON file mapping meanings to descriptions.
Args:
author: Name and email address of contact for translators.
lang: ISO 639-1 source language code.
output_dir: Relative directory for output files.
units: A list of dictionaries with entries for 'meaning', 'source',
'description', and 'keys' (the last only if write_key_file is true),
in the order desired in the output files.
write_key_file: Whether to output a keys.json file.
Raises:
IOError: An error occurs opening, writing to, or closing a file.
KeyError: An expected key is missing from units.
"""
lang_file = _create_lang_file(author, lang, output_dir)
qqq_file = _create_qqq_file(output_dir)
if write_key_file:
key_file = _create_key_file(output_dir)
first_entry = True
for unit in units:
if not first_entry:
lang_file.write(',\n')
if write_key_file:
key_file.write(',\n')
qqq_file.write(',\n')
lang_file.write(u'\t"{0}": "{1}"'.format(
unit['meaning'],
unit['source'].replace('"', "'")))
if write_key_file:
key_file.write('"{0}": "{1}"'.format(unit['meaning'], unit['key']))
qqq_file.write(u'\t"{0}": "{1}"'.format(
unit['meaning'],
unit['description'].replace('"', "'").replace(
'{lb}', '{').replace('{rb}', '}')))
first_entry = False
_close_lang_file(lang_file)
if write_key_file:
_close_key_file(key_file)
_close_qqq_file(qqq_file)

162
scripts/i18n/create_messages.py Executable file
View File

@@ -0,0 +1,162 @@
#!/usr/bin/python
# Generate .js files defining Blockly core and language messages.
#
# Copyright 2013 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import codecs
import os
import re
import sys
from common import read_json_file
_NEWLINE_PATTERN = re.compile('[\n\r]')
def string_is_ascii(s):
try:
# This approach is better for compatibility
return all(ord(c) < 128 for c in s)
except TypeError:
return False
def load_constants(filename):
"""Read in constants file, which must be output in every language."""
constant_defs = read_json_file(filename)
constants_text = '\n'
for key in constant_defs:
value = constant_defs[key]
value = value.replace('"', '\\"')
constants_text += u'\nBlockly.Msg["{0}"] = \"{1}\";'.format(
key, value)
return constants_text
def main():
"""Generate .js files defining Blockly core and language messages."""
# Process command-line arguments.
parser = argparse.ArgumentParser(description='Convert JSON files to JS.')
parser.add_argument('--source_lang', default='en',
help='ISO 639-1 source language code')
parser.add_argument('--source_lang_file',
default=os.path.join('json', 'en.json'),
help='Path to .json file for source language')
parser.add_argument('--source_synonym_file',
default=os.path.join('json', 'synonyms.json'),
help='Path to .json file with synonym definitions')
parser.add_argument('--source_constants_file',
default=os.path.join('json', 'constants.json'),
help='Path to .json file with constant definitions')
parser.add_argument('--output_dir', default='js/',
help='relative directory for output files')
parser.add_argument('--key_file', default='keys.json',
help='relative path to input keys file')
parser.add_argument('--quiet', action='store_true', default=False,
help='do not write anything to standard output')
parser.add_argument('files', nargs='+', help='input files')
args = parser.parse_args()
if not args.output_dir.endswith(os.path.sep):
args.output_dir += os.path.sep
# Read in source language .json file, which provides any values missing
# in target languages' .json files.
source_defs = read_json_file(os.path.join(os.curdir, args.source_lang_file))
# Make sure the source file doesn't contain a newline or carriage return.
for key, value in source_defs.items():
if _NEWLINE_PATTERN.search(value):
print('ERROR: definition of {0} in {1} contained a newline character.'.
format(key, args.source_lang_file))
sys.exit(1)
sorted_keys = sorted(source_defs.keys())
# Read in synonyms file, which must be output in every language.
synonym_defs = read_json_file(os.path.join(
os.curdir, args.source_synonym_file))
# synonym_defs is also being sorted to ensure the same order is kept
synonym_text = '\n'.join([u'Blockly.Msg["{0}"] = Blockly.Msg["{1}"];'
.format(key, synonym_defs[key]) for key in sorted(synonym_defs)])
# Read in constants file, which must be output in every language.
constants_text = load_constants(os.path.join(os.curdir, args.source_constants_file))
# Create each output file.
for arg_file in args.files:
(_, filename) = os.path.split(arg_file)
target_lang = filename[:filename.index('.')]
if target_lang not in ('qqq', 'keys', 'synonyms', 'constants'):
target_defs = read_json_file(os.path.join(os.curdir, arg_file))
# Verify that keys are 'ascii'
bad_keys = [key for key in target_defs if not string_is_ascii(key)]
if bad_keys:
print(u'These keys in {0} contain non ascii characters: {1}'.format(
filename, ', '.join(bad_keys)))
# If there's a '\n' or '\r', remove it and print a warning.
for key, value in target_defs.items():
if _NEWLINE_PATTERN.search(value):
print(u'WARNING: definition of {0} in {1} contained '
'a newline character.'.
format(key, arg_file))
target_defs[key] = _NEWLINE_PATTERN.sub(' ', value)
# Output file.
outname = os.path.join(os.curdir, args.output_dir, target_lang + '.js')
with codecs.open(outname, 'w', 'utf-8') as outfile:
outfile.write(
"""// This file was automatically generated. Do not modify.
'use strict';
""".format(target_lang.replace('-', '.')))
# For each key in the source language file, output the target value
# if present; otherwise, output the source language value with a
# warning comment.
for key in sorted_keys:
if key in target_defs:
value = target_defs[key]
comment = ''
del target_defs[key]
else:
value = source_defs[key]
comment = ' // untranslated'
value = value.replace('"', '\\"')
outfile.write(u'Blockly.Msg["{0}"] = "{1}";{2}\n'
.format(key, value, comment))
# Announce any keys defined only for target language.
if target_defs:
extra_keys = [key for key in target_defs if key not in synonym_defs]
synonym_keys = [key for key in target_defs if key in synonym_defs]
if not args.quiet:
if extra_keys:
print(u'These extra keys appeared in {0}: {1}'.format(
filename, ', '.join(extra_keys)))
if synonym_keys:
print(u'These synonym keys appeared in {0}: {1}'.format(
filename, ', '.join(synonym_keys)))
outfile.write(synonym_text)
outfile.write(constants_text)
if not args.quiet:
print('Created {0}.'.format(outname))
if __name__ == '__main__':
main()

72
scripts/i18n/dedup_json.py Executable file
View File

@@ -0,0 +1,72 @@
#!/usr/bin/python
# Consolidates duplicate key-value pairs in a JSON file.
# If the same key is used with different values, no warning is given,
# and there is no guarantee about which key-value pair will be output.
# There is also no guarantee as to the order of the key-value pairs
# output.
#
# Copyright 2013 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import codecs
import json
from common import InputError
def main():
"""Parses arguments and iterates over files.
Raises:
IOError: An I/O error occurred with an input or output file.
InputError: Input JSON could not be parsed.
"""
# Set up argument parser.
parser = argparse.ArgumentParser(
description='Removes duplicate key-value pairs from JSON files.')
parser.add_argument('--suffix', default='',
help='optional suffix for output files; '
'if empty, files will be changed in place')
parser.add_argument('files', nargs='+', help='input files')
args = parser.parse_args()
# Iterate over files.
for filename in args.files:
# Read in json using Python libraries. This eliminates duplicates.
print('Processing ' + filename + '...')
try:
with codecs.open(filename, 'r', 'utf-8') as infile:
j = json.load(infile)
except ValueError as e:
print('Error reading ' + filename)
raise InputError(filename, str(e))
# Built up output strings as an array to make output of delimiters easier.
output = []
for key in j:
if key != '@metadata':
output.append('\t"' + key + '": "' +
j[key].replace('\n', '\\n') + '"')
# Output results.
with codecs.open(filename + args.suffix, 'w', 'utf-8') as outfile:
outfile.write('{\n')
outfile.write(',\n'.join(output))
outfile.write('\n}\n')
if __name__ == '__main__':
main()

135
scripts/i18n/js_to_json.py Executable file
View File

@@ -0,0 +1,135 @@
#!/usr/bin/python
# Gives the translation status of the specified apps and languages.
#
# Copyright 2013 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Extracts messages from messages.js file into .json files for translation.
Specifically, lines with the following formats are extracted:
/// Here is a description of the following message.
Blockly.SOME_KEY = 'Some value';
Adjacent "///" lines are concatenated.
There are two output files, each of which is proper JSON. For each key, the
file en.json would get an entry of the form:
"Blockly.SOME_KEY", "Some value",
The file qqq.json would get:
"Blockly.SOME_KEY", "Here is a description of the following message.",
Commas would of course be omitted for the final entry of each value.
@author Ellen Spertus (ellen.spertus@gmail.com)
"""
import argparse
import codecs
import json
import os
import re
from common import write_files
_INPUT_DEF_PATTERN = re.compile("""Blockly.Msg.(\w*)\s*=\s*'(.*)';?\r?$""")
_INPUT_SYN_PATTERN = re.compile(
"""Blockly.Msg.(\w*)\s*=\s*Blockly.Msg.(\w*);""")
_CONSTANT_DESCRIPTION_PATTERN = re.compile(
"""{{Notranslate}}""", re.IGNORECASE)
def main():
# Set up argument parser.
parser = argparse.ArgumentParser(description='Create translation files.')
parser.add_argument(
'--author',
default='Ellen Spertus <ellen.spertus@gmail.com>',
help='name and email address of contact for translators')
parser.add_argument('--lang', default='en',
help='ISO 639-1 source language code')
parser.add_argument('--output_dir', default='json',
help='relative directory for output files')
parser.add_argument('--input_file', default='messages.js',
help='input file')
parser.add_argument('--quiet', action='store_true', default=False,
help='only display warnings, not routine info')
args = parser.parse_args()
if (not args.output_dir.endswith(os.path.sep)):
args.output_dir += os.path.sep
# Read and parse input file.
results = []
synonyms = {}
constants = {} # Values that are constant across all languages.
description = ''
infile = codecs.open(args.input_file, 'r', 'utf-8')
for line in infile:
if line.startswith('///'):
if description:
description = description + ' ' + line[3:].strip()
else:
description = line[3:].strip()
else:
match = _INPUT_DEF_PATTERN.match(line)
if match:
key = match.group(1)
value = match.group(2).replace("\\'", "'")
if not description:
print('Warning: No description for ' + result['meaning'])
if (description and _CONSTANT_DESCRIPTION_PATTERN.search(description)):
constants[key] = value
else:
result = {}
result['meaning'] = key
result['source'] = value
result['description'] = description
results.append(result)
description = ''
else:
match = _INPUT_SYN_PATTERN.match(line)
if match:
if description:
print('Warning: Description preceding definition of synonym {0}.'.
format(match.group(1)))
description = ''
synonyms[match.group(1)] = match.group(2)
infile.close()
# Create <lang_file>.json, keys.json, and qqq.json.
write_files(args.author, args.lang, args.output_dir, results, False)
# Create synonyms.json.
synonym_file_name = os.path.join(os.curdir, args.output_dir, 'synonyms.json')
with open(synonym_file_name, 'w') as outfile:
json.dump(synonyms, outfile)
if not args.quiet:
print("Wrote {0} synonym pairs to {1}.".format(
len(synonyms), synonym_file_name))
# Create constants.json
constants_file_name = os.path.join(os.curdir, args.output_dir, 'constants.json')
with open(constants_file_name, 'w') as outfile:
json.dump(constants, outfile)
if not args.quiet:
print("Wrote {0} constant pairs to {1}.".format(
len(constants), synonym_file_name))
if __name__ == '__main__':
main()

46
scripts/i18n/tests.py Normal file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Tests of i18n scripts.
#
# Copyright 2013 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import common
import re
import unittest
class TestSequenceFunctions(unittest.TestCase):
def test_insert_breaks(self):
spaces = re.compile(r'\s+|\\n')
def contains_all_chars(orig, result):
return re.sub(spaces, '', orig) == re.sub(spaces, '', result)
sentences = [u'Quay Pegman qua bên trái hoặc bên phải 90 độ.',
u'Foo bar baz this is english that is okay bye.',
u'If there is a path in the specified direction, \nthen ' +
u'do some actions.',
u'If there is a path in the specified direction, then do ' +
u'the first block of actions. Otherwise, do the second ' +
u'block of actions.']
for sentence in sentences:
output = common.insert_breaks(sentence, 30, 50)
self.assertTrue(contains_all_chars(sentence, output),
u'Mismatch between:\n{0}\n{1}'.format(
re.sub(spaces, '', sentence),
re.sub(spaces, '', output)))
if __name__ == '__main__':
unittest.main()