mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
452 lines
16 KiB
Python
Executable File
452 lines
16 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
|
||
import argparse
|
||
import collections
|
||
import datetime
|
||
import logging
|
||
import os
|
||
import shutil
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
|
||
import bs4
|
||
import jinja2
|
||
import livereload
|
||
import markdown.util
|
||
|
||
from mkdocs import config
|
||
from mkdocs import exceptions
|
||
from mkdocs.commands import build as mkdocs_build
|
||
|
||
from concatenate import concatenate
|
||
|
||
import mdx_clickhouse
|
||
import test
|
||
import util
|
||
import website
|
||
|
||
|
||
class ClickHouseMarkdown(markdown.extensions.Extension):
|
||
class ClickHousePreprocessor(markdown.util.Processor):
|
||
def run(self, lines):
|
||
for line in lines:
|
||
if '<!--hide-->' not in line:
|
||
yield line
|
||
|
||
def extendMarkdown(self, md):
|
||
md.preprocessors.register(self.ClickHousePreprocessor(), 'clickhouse_preprocessor', 31)
|
||
|
||
|
||
markdown.extensions.ClickHouseMarkdown = ClickHouseMarkdown
|
||
|
||
|
||
def build_nav_entry(root):
|
||
if root.endswith('images'):
|
||
return None, None, None
|
||
result_items = []
|
||
index_meta, _ = util.read_md_file(os.path.join(root, 'index.md'))
|
||
current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title', 'hidden'))
|
||
for filename in os.listdir(root):
|
||
path = os.path.join(root, filename)
|
||
if os.path.isdir(path):
|
||
prio, title, payload = build_nav_entry(path)
|
||
if title and payload:
|
||
result_items.append((prio, title, payload))
|
||
elif filename.endswith('.md'):
|
||
path = os.path.join(root, filename)
|
||
meta, _ = util.read_md_file(path)
|
||
path = path.split('/', 2)[-1]
|
||
title = meta.get('toc_title', 'hidden')
|
||
prio = meta.get('toc_priority', 9999)
|
||
result_items.append((prio, title, path))
|
||
result_items = sorted(result_items, key=lambda x: (x[0], x[1]))
|
||
result = collections.OrderedDict([(item[1], item[2]) for item in result_items])
|
||
return index_meta.get('toc_priority', 10000), current_title, result
|
||
|
||
|
||
def build_nav(lang, args):
|
||
docs_dir = os.path.join(args.docs_dir, lang)
|
||
_, _, nav = build_nav_entry(docs_dir)
|
||
result = []
|
||
for key, value in nav.items():
|
||
result.append({key: value})
|
||
return result
|
||
|
||
|
||
def build_for_lang(lang, args):
|
||
logging.info(f'Building {lang} docs')
|
||
os.environ['SINGLE_PAGE'] = '0'
|
||
|
||
config_path = os.path.join(args.docs_dir, f'toc_{lang}.yml')
|
||
if args.is_stable_release and not os.path.exists(config_path):
|
||
logging.warning(f'Skipping {lang} docs, because {config} does not exist')
|
||
return
|
||
|
||
try:
|
||
theme_cfg = {
|
||
'name': None,
|
||
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
|
||
'language': lang,
|
||
'direction': 'rtl' if lang == 'fa' else 'ltr',
|
||
# TODO: cleanup
|
||
'feature': {
|
||
'tabs': False
|
||
},
|
||
'palette': {
|
||
'primary': 'white',
|
||
'accent': 'white'
|
||
},
|
||
'font': False,
|
||
'logo': 'images/logo.svg',
|
||
'favicon': 'assets/images/favicon.ico',
|
||
'static_templates': ['404.html'],
|
||
'extra': {
|
||
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching
|
||
}
|
||
}
|
||
|
||
# the following list of languages is sorted according to
|
||
# https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers
|
||
languages = {
|
||
'en': 'English',
|
||
'zh': '中文',
|
||
'es': 'Español',
|
||
'fr': 'Français',
|
||
'ru': 'Русский',
|
||
'ja': '日本語',
|
||
'fa': 'فارسی'
|
||
}
|
||
|
||
site_names = {
|
||
'en': 'ClickHouse %s Documentation',
|
||
'es': 'Documentación de ClickHouse %s',
|
||
'fr': 'Documentation ClickHouse %s',
|
||
'ru': 'Документация ClickHouse %s',
|
||
'zh': 'ClickHouse文档 %s',
|
||
'ja': 'ClickHouseドキュメント %s',
|
||
'fa': 'مستندات %sClickHouse'
|
||
}
|
||
|
||
assert len(site_names) == len(languages)
|
||
|
||
if args.version_prefix:
|
||
site_dir = os.path.join(args.docs_output_dir, args.version_prefix, lang)
|
||
else:
|
||
site_dir = os.path.join(args.docs_output_dir, lang)
|
||
|
||
markdown_extensions = [
|
||
'mdx_clickhouse',
|
||
'admonition',
|
||
'attr_list',
|
||
'codehilite',
|
||
'nl2br',
|
||
'sane_lists',
|
||
'pymdownx.magiclink',
|
||
'pymdownx.superfences',
|
||
'extra',
|
||
{
|
||
'toc': {
|
||
'permalink': True,
|
||
'slugify': mdx_clickhouse.slugify
|
||
}
|
||
}
|
||
]
|
||
|
||
plugins = ['macros']
|
||
if args.htmlproofer:
|
||
plugins.append('htmlproofer')
|
||
|
||
raw_config = dict(
|
||
site_name=site_names.get(lang, site_names['en']) % args.version_prefix,
|
||
site_url=f'https://clickhouse.tech/docs/{lang}/',
|
||
docs_dir=os.path.join(args.docs_dir, lang),
|
||
site_dir=site_dir,
|
||
strict=not args.version_prefix,
|
||
theme=theme_cfg,
|
||
copyright='©2016–2020 Yandex LLC',
|
||
use_directory_urls=True,
|
||
repo_name='ClickHouse/ClickHouse',
|
||
repo_url='https://github.com/ClickHouse/ClickHouse/',
|
||
edit_uri=f'edit/master/docs/{lang}',
|
||
extra_css=[f'assets/stylesheets/custom.css?{args.rev_short}'],
|
||
markdown_extensions=markdown_extensions,
|
||
plugins=plugins,
|
||
extra={
|
||
'stable_releases': args.stable_releases,
|
||
'version_prefix': args.version_prefix,
|
||
'single_page': False,
|
||
'rev': args.rev,
|
||
'rev_short': args.rev_short,
|
||
'rev_url': args.rev_url,
|
||
'events': args.events,
|
||
'languages': languages
|
||
}
|
||
)
|
||
|
||
if os.path.exists(config_path):
|
||
nav = None
|
||
raw_config['config_file'] = config_path
|
||
else:
|
||
nav = build_nav(lang, args)
|
||
raw_config['nav'] = nav
|
||
|
||
cfg = config.load_config(**raw_config)
|
||
|
||
try:
|
||
mkdocs_build.build(cfg)
|
||
except jinja2.exceptions.TemplateError:
|
||
if not args.version_prefix:
|
||
raise
|
||
mdx_clickhouse.PatchedMacrosPlugin.disabled = True
|
||
mkdocs_build.build(cfg)
|
||
|
||
if not args.skip_single_page:
|
||
build_single_page_version(lang, args, nav, cfg)
|
||
|
||
mdx_clickhouse.PatchedMacrosPlugin.disabled = False
|
||
|
||
logging.info(f'Finished building {lang} docs')
|
||
|
||
except exceptions.ConfigurationError as e:
|
||
raise SystemExit('\n' + str(e))
|
||
|
||
|
||
def build_single_page_version(lang, args, nav, cfg):
|
||
logging.info(f'Building single page version for {lang}')
|
||
os.environ['SINGLE_PAGE'] = '1'
|
||
extra = cfg.data['extra']
|
||
extra['single_page'] = True
|
||
|
||
with util.autoremoved_file(os.path.join(args.docs_dir, lang, 'single.md')) as single_md:
|
||
concatenate(lang, args.docs_dir, single_md, nav)
|
||
|
||
with util.temp_dir() as site_temp:
|
||
with util.temp_dir() as docs_temp:
|
||
docs_src_lang = os.path.join(args.docs_dir, lang)
|
||
docs_temp_lang = os.path.join(docs_temp, lang)
|
||
shutil.copytree(docs_src_lang, docs_temp_lang)
|
||
for root, _, filenames in os.walk(docs_temp_lang):
|
||
for filename in filenames:
|
||
if filename != 'single.md' and filename.endswith('.md'):
|
||
os.unlink(os.path.join(root, filename))
|
||
|
||
cfg.load_dict({
|
||
'docs_dir': docs_temp_lang,
|
||
'site_dir': site_temp,
|
||
'extra': extra,
|
||
'nav': [
|
||
{cfg.data.get('site_name'): 'single.md'}
|
||
]
|
||
})
|
||
|
||
mkdocs_build.build(cfg)
|
||
|
||
if args.version_prefix:
|
||
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, args.version_prefix, lang, 'single')
|
||
else:
|
||
single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
|
||
|
||
if os.path.exists(single_page_output_path):
|
||
shutil.rmtree(single_page_output_path)
|
||
|
||
shutil.copytree(
|
||
os.path.join(site_temp, 'single'),
|
||
single_page_output_path
|
||
)
|
||
|
||
logging.info(f'Re-building single page for {lang} pdf/test')
|
||
with util.temp_dir() as test_dir:
|
||
extra['single_page'] = False
|
||
cfg.load_dict({
|
||
'docs_dir': docs_temp_lang,
|
||
'site_dir': test_dir,
|
||
'extra': extra,
|
||
'nav': [
|
||
{cfg.data.get('site_name'): 'single.md'}
|
||
]
|
||
})
|
||
mkdocs_build.build(cfg)
|
||
|
||
css_in = ' '.join(website.get_css_in(args))
|
||
js_in = ' '.join(website.get_js_in(args))
|
||
subprocess.check_call(f'cat {css_in} > {test_dir}/css/base.css', shell=True)
|
||
subprocess.check_call(f'cat {js_in} > {test_dir}/js/base.js', shell=True)
|
||
if args.save_raw_single_page:
|
||
shutil.copytree(test_dir, args.save_raw_single_page)
|
||
|
||
if not args.version_prefix: # maybe enable in future
|
||
logging.info(f'Running tests for {lang}')
|
||
test.test_single_page(
|
||
os.path.join(test_dir, 'single', 'index.html'), lang)
|
||
|
||
if not args.skip_pdf:
|
||
single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
|
||
single_page_pdf = os.path.abspath(
|
||
os.path.join(single_page_output_path, f'clickhouse_{lang}.pdf')
|
||
)
|
||
|
||
with open(single_page_index_html, 'r') as f:
|
||
soup = bs4.BeautifulSoup(
|
||
f.read(),
|
||
features='html.parser'
|
||
)
|
||
soup_prefix = f'file://{test_dir}'
|
||
for img in soup.findAll('img'):
|
||
if img['src'].startswith('/'):
|
||
img['src'] = soup_prefix + img['src']
|
||
for script in soup.findAll('script'):
|
||
script['src'] = soup_prefix + script['src'].split('?', 1)[0]
|
||
for link in soup.findAll('link'):
|
||
link['href'] = soup_prefix + link['href'].split('?', 1)[0]
|
||
|
||
with open(single_page_index_html, 'w') as f:
|
||
f.write(str(soup))
|
||
|
||
create_pdf_command = [
|
||
'wkhtmltopdf',
|
||
'--print-media-type',
|
||
'--log-level', 'warn',
|
||
single_page_index_html, single_page_pdf
|
||
]
|
||
|
||
logging.info(' '.join(create_pdf_command))
|
||
subprocess.check_call(' '.join(create_pdf_command), shell=True)
|
||
|
||
logging.info(f'Finished building single page version for {lang}')
|
||
|
||
|
||
def write_redirect_html(out_path, to_url):
|
||
out_dir = os.path.dirname(out_path)
|
||
try:
|
||
os.makedirs(out_dir)
|
||
except OSError:
|
||
pass
|
||
with open(out_path, 'w') as f:
|
||
f.write(f'''<!DOCTYPE HTML>
|
||
<html lang="en-US">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta http-equiv="refresh" content="0; url={to_url}">
|
||
<script type="text/javascript">
|
||
window.location.href = "{to_url}"
|
||
</script>
|
||
<title>Page Redirection</title>
|
||
</head>
|
||
<body>
|
||
If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
|
||
</body>
|
||
</html>''')
|
||
|
||
|
||
def build_redirect_html(args, from_path, to_path):
|
||
for lang in ['en']: # TODO: restore args.lang.split(','):
|
||
out_path = os.path.join(args.docs_output_dir, lang, from_path.replace('.md', '/index.html'))
|
||
version_prefix = args.version_prefix + '/' if args.version_prefix else '/'
|
||
target_path = to_path.replace('.md', '/')
|
||
to_url = f'/docs{version_prefix}{lang}/{target_path}'
|
||
to_url = to_url.strip()
|
||
write_redirect_html(out_path, to_url)
|
||
|
||
|
||
def build_redirects(args):
|
||
with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f:
|
||
for line in f:
|
||
from_path, to_path = line.split(' ', 1)
|
||
build_redirect_html(args, from_path, to_path)
|
||
|
||
|
||
def build_docs(args):
|
||
tasks = []
|
||
for lang in args.lang.split(','):
|
||
if lang:
|
||
tasks.append((lang, args,))
|
||
util.run_function_in_parallel(build_for_lang, tasks, threads=False)
|
||
build_redirects(args)
|
||
|
||
|
||
def build(args):
|
||
if os.path.exists(args.output_dir):
|
||
shutil.rmtree(args.output_dir)
|
||
|
||
if not args.skip_website:
|
||
website.build_website(args)
|
||
|
||
build_docs(args)
|
||
|
||
from github import build_releases
|
||
build_releases(args, build_docs)
|
||
|
||
if not args.skip_website:
|
||
website.minify_website(args)
|
||
|
||
for static_redirect in [
|
||
('tutorial.html', '/docs/en/getting_started/tutorial/',),
|
||
('reference_en.html', '/docs/en/single/', ),
|
||
('reference_ru.html', '/docs/ru/single/',),
|
||
('docs/index.html', '/docs/en/',),
|
||
]:
|
||
write_redirect_html(
|
||
os.path.join(args.output_dir, static_redirect[0]),
|
||
static_redirect[1]
|
||
)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
os.chdir(os.path.join(os.path.dirname(__file__), '..'))
|
||
website_dir = os.path.join('..', 'website')
|
||
arg_parser = argparse.ArgumentParser()
|
||
arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,fa')
|
||
arg_parser.add_argument('--docs-dir', default='.')
|
||
arg_parser.add_argument('--theme-dir', default=website_dir)
|
||
arg_parser.add_argument('--website-dir', default=website_dir)
|
||
arg_parser.add_argument('--output-dir', default='build')
|
||
arg_parser.add_argument('--enable-stable-releases', action='store_true')
|
||
arg_parser.add_argument('--stable-releases-limit', type=int, default='10')
|
||
arg_parser.add_argument('--version-prefix', type=str, default='')
|
||
arg_parser.add_argument('--is-stable-release', action='store_true')
|
||
arg_parser.add_argument('--skip-single-page', action='store_true')
|
||
arg_parser.add_argument('--skip-pdf', action='store_true')
|
||
arg_parser.add_argument('--skip-website', action='store_true')
|
||
arg_parser.add_argument('--minify', action='store_true')
|
||
arg_parser.add_argument('--htmlproofer', action='store_true')
|
||
arg_parser.add_argument('--no-docs-macros', action='store_true')
|
||
arg_parser.add_argument('--save-raw-single-page', type=str)
|
||
arg_parser.add_argument('--livereload', type=int, default='0')
|
||
arg_parser.add_argument('--verbose', action='store_true')
|
||
|
||
args = arg_parser.parse_args()
|
||
|
||
logging.basicConfig(
|
||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||
stream=sys.stderr
|
||
)
|
||
|
||
logging.getLogger('MARKDOWN').setLevel(logging.INFO)
|
||
|
||
args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
|
||
|
||
from github import choose_latest_releases, get_events
|
||
args.stable_releases = choose_latest_releases(args) if args.enable_stable_releases else []
|
||
args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
|
||
args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
|
||
args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
|
||
args.events = get_events(args)
|
||
|
||
from build import build
|
||
build(args)
|
||
|
||
if args.livereload:
|
||
new_args = [arg for arg in sys.argv if not arg.startswith('--livereload')]
|
||
new_args = sys.executable + ' ' + ' '.join(new_args)
|
||
|
||
server = livereload.Server()
|
||
server.watch(args.docs_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||
server.watch(args.website_dir + '**/*', livereload.shell(new_args, cwd='tools', shell=True))
|
||
server.serve(
|
||
root=args.output_dir,
|
||
host='0.0.0.0',
|
||
port=args.livereload
|
||
)
|
||
sys.exit(0)
|