ClickHouse/docs/tools/website.py

231 lines
7.8 KiB
Python
Raw Normal View History

2020-03-30 08:25:29 +00:00
import hashlib
import json
import logging
import os
import shutil
2020-03-30 08:25:29 +00:00
import subprocess
import bs4
import closure
import cssmin
import htmlmin
2020-02-18 14:19:44 +00:00
import jinja2
import jsmin
2020-03-30 08:25:29 +00:00
import mdx_clickhouse
2020-02-18 14:19:44 +00:00
def adjust_markdown_html(content):
soup = bs4.BeautifulSoup(
content,
features='html.parser'
)
for details in soup.find_all('details'):
for summary in details.find_all('summary'):
if summary.parent != details:
summary.extract()
details.insert(0, summary)
for div in soup.find_all('div'):
div.attrs['role'] = 'alert'
div_class = div.attrs.get('class')
for a in div.find_all('a'):
a_class = a.attrs.get('class')
if a_class:
a.attrs['class'] = a_class + ['alert-link']
else:
a.attrs['class'] = 'alert-link'
for p in div.find_all('p'):
p_class = p.attrs.get('class')
if p_class and ('admonition-title' in p_class):
p.attrs['class'] = p_class + ['alert-heading', 'display-5', 'mb-2']
if div_class and 'admonition' in div.attrs.get('class'):
if ('info' in div_class) or ('note' in div_class):
mode = 'alert-primary'
elif ('attention' in div_class) or ('warning' in div_class):
mode = 'alert-warning'
elif 'important' in div_class:
mode = 'alert-danger'
elif 'tip' in div_class:
mode = 'alert-info'
else:
mode = 'alert-secondary'
div.attrs['class'] = div_class + ['alert', 'lead', 'pb-0', 'mb-4', mode]
return str(soup)
def minify_html(content):
return htmlmin.minify(content,
remove_comments=False,
remove_empty_space=True,
remove_all_empty_space=False,
reduce_empty_attributes=True,
reduce_boolean_attributes=False,
remove_optional_attribute_quotes=True,
convert_charrefs=False,
keep_pre=True)
def build_website(args):
logging.info('Building website')
2020-02-18 14:19:44 +00:00
env = jinja2.Environment(
loader=jinja2.FileSystemLoader([
args.website_dir,
os.path.join(args.docs_dir, '_includes')
]),
2020-03-30 08:25:29 +00:00
extensions=[
'jinja2.ext.i18n',
'jinja2_highlight.HighlightExtension'
]
2020-02-18 14:19:44 +00:00
)
env.extend(jinja2_highlight_cssclass='syntax p-3 my-3')
2020-03-30 08:25:29 +00:00
translations_dir = os.path.join(args.website_dir, 'locale')
env.install_gettext_translations(
mdx_clickhouse.get_translations(translations_dir, 'en'),
newstyle=True
)
2020-02-18 14:19:44 +00:00
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
'*.md',
'*.sh',
2020-03-30 08:25:29 +00:00
'*.css',
'*.json',
'js/*.js',
'build',
'docs',
'public',
'node_modules',
'templates',
'feathericons',
'locale'
)
)
2020-02-18 14:19:44 +00:00
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
if filename == 'main.html':
continue
2020-02-18 14:19:44 +00:00
path = os.path.join(root, filename)
if not filename.endswith('.html'):
2020-02-18 14:19:44 +00:00
continue
logging.info('Processing %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
template = env.from_string(content)
content = template.render(args.__dict__)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
2020-03-30 11:39:26 +00:00
def get_css_in(args):
return [
f"'{args.website_dir}/css/bootstrap.css'",
f"'{args.website_dir}/css/docsearch.css'",
f"'{args.website_dir}/css/base.css'",
f"'{args.website_dir}/css/docs.css'",
2020-03-30 08:25:29 +00:00
f"'{args.website_dir}/css/highlight.css'"
2020-03-30 11:39:26 +00:00
]
def get_js_in(args):
return [
2020-04-30 08:32:08 +00:00
f"'{args.website_dir}/js/jquery.js'",
2020-03-30 11:39:26 +00:00
f"'{args.website_dir}/js/popper.js'",
f"'{args.website_dir}/js/bootstrap.js'",
f"'{args.website_dir}/js/base.js'",
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'"
]
def minify_website(args):
css_in = ' '.join(get_css_in(args))
2020-03-30 08:25:29 +00:00
css_out = f'{args.output_dir}/css/base.css'
if args.minify:
command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
else:
command = f'cat {css_in} > {css_out}'
logging.info(command)
output = subprocess.check_output(command, shell=True)
logging.debug(output)
with open(css_out, 'rb') as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
2020-03-30 11:39:26 +00:00
js_in = get_js_in(args)
2020-03-30 08:25:29 +00:00
js_out = f'{args.output_dir}/js/base.js'
if args.minify:
js_in = [js[1:-1] for js in js_in]
closure_args = [
'--js', *js_in, '--js_output_file', js_out,
'--compilation_level', 'SIMPLE',
'--dependency_mode', 'NONE',
'--third_party', '--use_types_for_optimization',
2020-03-31 10:35:56 +00:00
'--isolation_mode', 'IIFE'
2020-03-30 08:25:29 +00:00
]
logging.info(closure_args)
if closure.run(*closure_args):
raise RuntimeError('failed to run closure compiler')
with open(js_out, 'r') as f:
js_content = jsmin.jsmin(f.read())
with open(js_out, 'w') as f:
f.write(js_content)
2020-03-30 08:25:29 +00:00
else:
js_in = ' '.join(js_in)
command = f'cat {js_in} > {js_out}'
logging.info(command)
output = subprocess.check_output(command, shell=True)
2020-03-30 08:25:29 +00:00
logging.debug(output)
with open(js_out, 'rb') as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
if args.minify:
2020-02-18 14:19:44 +00:00
logging.info('Minifying website')
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
path = os.path.join(root, filename)
if not (
filename.endswith('.html') or
2020-03-30 08:25:29 +00:00
filename.endswith('.css')
):
continue
2020-02-18 14:19:44 +00:00
logging.info('Minifying %s', path)
with open(path, 'rb') as f:
content = f.read().decode('utf-8')
2020-02-18 14:20:53 +00:00
if filename.endswith('.html'):
content = minify_html(content)
2020-03-30 08:25:29 +00:00
content = content.replace('base.css?css_digest', f'base.css?{css_digest}')
content = content.replace('base.js?js_digest', f'base.js?{js_digest}')
2020-02-18 14:20:53 +00:00
elif filename.endswith('.css'):
content = cssmin.cssmin(content)
elif filename.endswith('.js'):
content = jsmin.jsmin(content)
with open(path, 'wb') as f:
f.write(content.encode('utf-8'))
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, 'benchmark')
for benchmark_kind in ['dbms', 'hardware']:
results = []
results_root = os.path.join(benchmark_root, benchmark_kind, 'results')
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.debug(f'Reading benchmark result from {result_file}')
with open(result_file, 'r') as f:
results += json.loads(f.read())
results_js = os.path.join(args.output_dir, 'benchmark', benchmark_kind, 'results.js')
with open(results_js, 'w') as f:
data = json.dumps(results)
f.write(f'var results = {data};')