ClickHouse/docs/tools/translate/translate.py
Ivan Blinkov f315e5079b
More complete "es" translation (#9791)
* replace exit with assert in test_single_page

* improve save_raw_single_page docs option

* More grammar fixes

* "Built from" link in new tab

* fix mistype

* Example of include in docs

* add anchor to meeting form

* Draft of translation helper

* WIP on translation helper

* Replace some fa docs content with machine translation

* add normalize-en-markdown.sh

* normalize some en markdown

* normalize some en markdown

* admonition support

* normalize

* normalize

* normalize

* support wide tables

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* normalize

* lightly edited machine translation of introdpection.md

* lightly edited machhine translation of lazy.md

* WIP on translation utils

* Normalize ru docs

* Normalize other languages

* some fixes

* WIP on normalize/translate tools

* add requirements.txt

* [experimental] add es docs language as machine translated draft

* remove duplicate script

* Back to wider tab-stop (narrow renders not so well)

* Links to nowhere check at least for English

* use f string

* More complete es translation
2020-03-21 12:17:06 +03:00

64 lines
1.8 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import os
import random
import sys
import time
import urllib.parse
import googletrans
import requests
import yaml
import typograph_ru
translator = googletrans.Translator()
target_language = os.environ.get('TARGET_LANGUAGE', 'ru')
is_yandex = os.environ.get('YANDEX') is not None
def translate(text):
if target_language == 'en':
return text
elif target_language == 'typograph_ru':
return typograph_ru.typograph(text)
elif is_yandex:
text = text.replace('', '\'')
text = text.replace('', '\'')
if text.isascii() and not text.isupper():
text = urllib.parse.quote(text)
url = f'http://translate.yandex.net/api/v1/tr.json/translate?srv=docs&lang=en-{target_language}&text={text}'
result = requests.get(url).json()
if result.get('code') == 200:
return result['text'][0]
else:
print('Failed to translate', str(result), file=sys.stderr)
sys.exit(1)
else:
return text
else:
time.sleep(random.random())
return translator.translate(text, target_language).text
def translate_toc(root):
if isinstance(root, dict):
result = []
for key, value in root.items():
key = translate(key) if key != 'hidden' and not key.isupper() else key
result.append((key, translate_toc(value),))
return dict(result)
elif isinstance(root, list):
return [translate_toc(item) for item in root]
elif isinstance(root, str):
return root
if __name__ == '__main__':
target_language = sys.argv[1]
is_yandex = True
result = translate_toc(yaml.full_load(sys.stdin.read())['nav'])
print(yaml.dump({'nav': result}))