ClickHouse/docs/tools/website.py
2022-05-09 02:11:58 +02:00

264 lines
8.4 KiB
Python

import hashlib
import json
import logging
import os
import shutil
import subprocess
import bs4
import util
def handle_iframe(iframe, soup):
allowed_domains = ["https://www.youtube.com/"]
illegal_domain = True
iframe_src = iframe.attrs["src"]
for domain in allowed_domains:
if iframe_src.startswith(domain):
illegal_domain = False
break
if illegal_domain:
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
wrapper = soup.new_tag("div")
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
iframe.insert_before(wrapper)
iframe.extract()
wrapper.insert(0, iframe)
if "width" in iframe.attrs:
del iframe.attrs["width"]
if "height" in iframe.attrs:
del iframe.attrs["height"]
iframe.attrs[
"allow"
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
iframe.attrs["class"] = "embed-responsive-item"
iframe.attrs["frameborder"] = "0"
iframe.attrs["allowfullscreen"] = "1"
def adjust_markdown_html(content):
soup = bs4.BeautifulSoup(content, features="html.parser")
for a in soup.find_all("a"):
a_class = a.attrs.get("class")
a_href = a.attrs.get("href")
if a_class and "headerlink" in a_class:
a.string = "\xa0"
if a_href and a_href.startswith("http"):
a.attrs["target"] = "_blank"
for code in soup.find_all("code"):
code_class = code.attrs.get("class")
if code_class:
code.attrs["class"] = code_class + ["syntax"]
else:
code.attrs["class"] = "syntax"
for iframe in soup.find_all("iframe"):
handle_iframe(iframe, soup)
for img in soup.find_all("img"):
if img.attrs.get("alt") == "iframe":
img.name = "iframe"
img.string = ""
handle_iframe(img, soup)
continue
img_class = img.attrs.get("class")
if img_class:
img.attrs["class"] = img_class + ["img-fluid"]
else:
img.attrs["class"] = "img-fluid"
for details in soup.find_all("details"):
for summary in details.find_all("summary"):
if summary.parent != details:
summary.extract()
details.insert(0, summary)
for dd in soup.find_all("dd"):
dd_class = dd.attrs.get("class")
if dd_class:
dd.attrs["class"] = dd_class + ["pl-3"]
else:
dd.attrs["class"] = "pl-3"
for div in soup.find_all("div"):
div_class = div.attrs.get("class")
is_admonition = div_class and "admonition" in div.attrs.get("class")
if is_admonition:
for a in div.find_all("a"):
a_class = a.attrs.get("class")
if a_class:
a.attrs["class"] = a_class + ["alert-link"]
else:
a.attrs["class"] = "alert-link"
for p in div.find_all("p"):
p_class = p.attrs.get("class")
if is_admonition and p_class and ("admonition-title" in p_class):
p.attrs["class"] = p_class + [
"alert-heading",
"display-4",
"text-reset",
"mb-2",
]
if is_admonition:
div.attrs["role"] = "alert"
if ("info" in div_class) or ("note" in div_class):
mode = "alert-primary"
elif ("attention" in div_class) or ("warning" in div_class):
mode = "alert-warning"
elif "important" in div_class:
mode = "alert-danger"
elif "tip" in div_class:
mode = "alert-info"
else:
mode = "alert-secondary"
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
return str(soup)
def build_website(args):
logging.info("Building website")
env = util.init_jinja2_env(args)
shutil.copytree(
args.website_dir,
args.output_dir,
ignore=shutil.ignore_patterns(
"*.md",
"*.sh",
"*.css",
"*.json",
"js/*.js",
"build",
"docs",
"public",
"node_modules",
"src",
"templates",
"locale",
".gitkeep",
),
)
shutil.copytree(
os.path.join(args.website_dir, "images"),
os.path.join(args.output_dir, "docs", "images"),
)
# This file can be requested to check for available ClickHouse releases.
shutil.copy2(
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
os.path.join(args.output_dir, "data", "version_date.tsv"),
)
# This file can be requested to install ClickHouse.
shutil.copy2(
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
os.path.join(args.output_dir, "data", "install.sh"),
)
for root, _, filenames in os.walk(args.output_dir):
for filename in filenames:
if filename == "main.html":
continue
path = os.path.join(root, filename)
if not filename.endswith(".html"):
continue
logging.info("Processing %s", path)
with open(path, "rb") as f:
content = f.read().decode("utf-8")
template = env.from_string(content)
content = template.render(args.__dict__)
with open(path, "wb") as f:
f.write(content.encode("utf-8"))
def get_css_in(args):
return [
f"'{args.website_dir}/css/bootstrap.css'",
f"'{args.website_dir}/css/docsearch.css'",
f"'{args.website_dir}/css/base.css'",
f"'{args.website_dir}/css/blog.css'",
f"'{args.website_dir}/css/docs.css'",
f"'{args.website_dir}/css/highlight.css'",
f"'{args.website_dir}/css/main.css'",
]
def get_js_in(args):
return [
f"'{args.website_dir}/js/jquery.js'",
f"'{args.website_dir}/js/popper.js'",
f"'{args.website_dir}/js/bootstrap.js'",
f"'{args.website_dir}/js/sentry.js'",
f"'{args.website_dir}/js/base.js'",
f"'{args.website_dir}/js/index.js'",
f"'{args.website_dir}/js/docsearch.js'",
f"'{args.website_dir}/js/docs.js'",
f"'{args.website_dir}/js/main.js'",
]
def minify_website(args):
css_in = " ".join(get_css_in(args))
css_out = f"{args.output_dir}/docs/css/base.css"
os.makedirs(f"{args.output_dir}/docs/css")
command = f"cat {css_in}"
output = subprocess.check_output(command, shell=True)
with open(css_out, "wb+") as f:
f.write(output)
with open(css_out, "rb") as f:
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
js_in = " ".join(get_js_in(args))
js_out = f"{args.output_dir}/docs/js/base.js"
os.makedirs(f"{args.output_dir}/docs/js")
command = f"cat {js_in}"
output = subprocess.check_output(command, shell=True)
with open(js_out, "wb+") as f:
f.write(output)
with open(js_out, "rb") as f:
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
logging.info(js_digest)
def process_benchmark_results(args):
benchmark_root = os.path.join(args.website_dir, "benchmark")
required_keys = {
"dbms": ["result"],
"hardware": ["result", "system", "system_full", "kind"],
"versions": ["version", "system"],
}
for benchmark_kind in ["dbms", "hardware", "versions"]:
results = []
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
for result in sorted(os.listdir(results_root)):
result_file = os.path.join(results_root, result)
logging.info(f"Reading benchmark result from {result_file}")
with open(result_file, "r") as f:
result = json.loads(f.read())
for item in result:
for required_key in required_keys[benchmark_kind]:
assert (
required_key in item
), f'No "{required_key}" in {result_file}'
results += result
results_js = os.path.join(
args.output_dir, "benchmark", benchmark_kind, "results.js"
)
with open(results_js, "w") as f:
data = json.dumps(results)
f.write(f"var results = {data};")