ClickHouse/docs/tools/translate/translate.py

99 lines
3.1 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import os
import random
import re
import sys
import time
import urllib.parse
import googletrans
import requests
import yaml
import typograph_ru
translator = googletrans.Translator()
2020-03-30 08:25:29 +00:00
default_target_language = os.environ.get('TARGET_LANGUAGE', 'ru')
curly_braces_re = re.compile('({[^}]+})')
is_yandex = os.environ.get('YANDEX') is not None
def translate_impl(text, target_language=None):
2020-03-30 08:25:29 +00:00
target_language = target_language or default_target_language
if target_language == 'en':
return text
elif target_language == 'typograph_ru':
return typograph_ru.typograph(text)
elif is_yandex:
text = text.replace('', '\'')
text = text.replace('', '\'')
2020-03-30 08:25:29 +00:00
has_alpha = any([char.isalpha() for char in text])
if text.isascii() and has_alpha and not text.isupper():
text = urllib.parse.quote(text)
url = f'http://translate.yandex.net/api/v1/tr.json/translate?srv=docs&lang=en-{target_language}&text={text}'
result = requests.get(url).json()
if result.get('code') == 200:
return result['text'][0]
else:
2020-03-30 08:25:29 +00:00
result = str(result)
print(f'Failed to translate "{text}": {result}', file=sys.stderr)
sys.exit(1)
else:
return text
else:
time.sleep(random.random())
return translator.translate(text, target_language).text
def translate(text, target_language=None):
result = []
for part in re.split(curly_braces_re, text):
if part.startswith('{') and part.endswith('}'):
result.append(part)
else:
result.append(translate_impl(part, target_language=target_language))
return ''.join(result)
2020-03-30 12:48:55 +00:00
def translate_toc(root, lang):
global is_yandex
is_yandex = True
if isinstance(root, dict):
result = []
for key, value in root.items():
2020-03-30 12:48:55 +00:00
key = translate(key, lang) if key != 'hidden' and not key.isupper() else key
result.append((key, translate_toc(value, lang),))
return dict(result)
elif isinstance(root, list):
2020-03-30 12:48:55 +00:00
return [translate_toc(item, lang) for item in root]
elif isinstance(root, str):
return root
2020-03-30 08:25:29 +00:00
def translate_po():
import babel.messages.pofile
2020-04-15 13:56:49 +00:00
base_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'website', 'locale')
for lang in ['en', 'zh', 'es', 'fr', 'ru', 'ja', 'tr', 'fa']:
2020-03-30 08:25:29 +00:00
po_path = os.path.join(base_dir, lang, 'LC_MESSAGES', 'messages.po')
with open(po_path, 'r') as f:
po_file = babel.messages.pofile.read_po(f, locale=lang, domain='messages')
for item in po_file:
if not item.string:
global is_yandex
is_yandex = True
item.string = translate(item.id, lang)
with open(po_path, 'wb') as f:
babel.messages.pofile.write_po(f, po_file)
if __name__ == '__main__':
target_language = sys.argv[1]
2020-03-30 08:25:29 +00:00
if target_language == 'po':
translate_po()
else:
2020-03-30 12:48:55 +00:00
result = translate_toc(yaml.full_load(sys.stdin.read())['nav'], sys.argv[1])
2020-03-30 08:25:29 +00:00
print(yaml.dump({'nav': result}))