mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 18:12:02 +00:00
346 lines
11 KiB
Python
346 lines
11 KiB
Python
import concurrent.futures
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
|
|
import bs4
|
|
import closure
|
|
import cssmin
|
|
import htmlmin
|
|
import jsmin
|
|
|
|
import util
|
|
|
|
|
|
def handle_iframe(iframe, soup):
|
|
allowed_domains = ["https://www.youtube.com/", "https://datalens.yandex/"]
|
|
illegal_domain = True
|
|
iframe_src = iframe.attrs["src"]
|
|
for domain in allowed_domains:
|
|
if iframe_src.startswith(domain):
|
|
illegal_domain = False
|
|
break
|
|
if illegal_domain:
|
|
raise RuntimeError(f"iframe from illegal domain: {iframe_src}")
|
|
wrapper = soup.new_tag("div")
|
|
wrapper.attrs["class"] = ["embed-responsive", "embed-responsive-16by9"]
|
|
iframe.insert_before(wrapper)
|
|
iframe.extract()
|
|
wrapper.insert(0, iframe)
|
|
if "width" in iframe.attrs:
|
|
del iframe.attrs["width"]
|
|
if "height" in iframe.attrs:
|
|
del iframe.attrs["height"]
|
|
iframe.attrs[
|
|
"allow"
|
|
] = "accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture"
|
|
iframe.attrs["class"] = "embed-responsive-item"
|
|
iframe.attrs["frameborder"] = "0"
|
|
iframe.attrs["allowfullscreen"] = "1"
|
|
|
|
|
|
def adjust_markdown_html(content):
|
|
soup = bs4.BeautifulSoup(content, features="html.parser")
|
|
|
|
for a in soup.find_all("a"):
|
|
a_class = a.attrs.get("class")
|
|
a_href = a.attrs.get("href")
|
|
if a_class and "headerlink" in a_class:
|
|
a.string = "\xa0"
|
|
if a_href and a_href.startswith("http"):
|
|
a.attrs["target"] = "_blank"
|
|
|
|
for code in soup.find_all("code"):
|
|
code_class = code.attrs.get("class")
|
|
if code_class:
|
|
code.attrs["class"] = code_class + ["syntax"]
|
|
else:
|
|
code.attrs["class"] = "syntax"
|
|
|
|
for iframe in soup.find_all("iframe"):
|
|
handle_iframe(iframe, soup)
|
|
|
|
for img in soup.find_all("img"):
|
|
if img.attrs.get("alt") == "iframe":
|
|
img.name = "iframe"
|
|
img.string = ""
|
|
handle_iframe(img, soup)
|
|
continue
|
|
img_class = img.attrs.get("class")
|
|
if img_class:
|
|
img.attrs["class"] = img_class + ["img-fluid"]
|
|
else:
|
|
img.attrs["class"] = "img-fluid"
|
|
|
|
for details in soup.find_all("details"):
|
|
for summary in details.find_all("summary"):
|
|
if summary.parent != details:
|
|
summary.extract()
|
|
details.insert(0, summary)
|
|
|
|
for dd in soup.find_all("dd"):
|
|
dd_class = dd.attrs.get("class")
|
|
if dd_class:
|
|
dd.attrs["class"] = dd_class + ["pl-3"]
|
|
else:
|
|
dd.attrs["class"] = "pl-3"
|
|
|
|
for div in soup.find_all("div"):
|
|
div_class = div.attrs.get("class")
|
|
is_admonition = div_class and "admonition" in div.attrs.get("class")
|
|
if is_admonition:
|
|
for a in div.find_all("a"):
|
|
a_class = a.attrs.get("class")
|
|
if a_class:
|
|
a.attrs["class"] = a_class + ["alert-link"]
|
|
else:
|
|
a.attrs["class"] = "alert-link"
|
|
|
|
for p in div.find_all("p"):
|
|
p_class = p.attrs.get("class")
|
|
if is_admonition and p_class and ("admonition-title" in p_class):
|
|
p.attrs["class"] = p_class + [
|
|
"alert-heading",
|
|
"display-4",
|
|
"text-reset",
|
|
"mb-2",
|
|
]
|
|
|
|
if is_admonition:
|
|
div.attrs["role"] = "alert"
|
|
if ("info" in div_class) or ("note" in div_class):
|
|
mode = "alert-primary"
|
|
elif ("attention" in div_class) or ("warning" in div_class):
|
|
mode = "alert-warning"
|
|
elif "important" in div_class:
|
|
mode = "alert-danger"
|
|
elif "tip" in div_class:
|
|
mode = "alert-info"
|
|
else:
|
|
mode = "alert-secondary"
|
|
div.attrs["class"] = div_class + ["alert", "pb-0", "mb-4", mode]
|
|
|
|
return str(soup)
|
|
|
|
|
|
def minify_html(content):
|
|
return htmlmin.minify(content)
|
|
|
|
|
|
def build_website(args):
|
|
logging.info("Building website")
|
|
env = util.init_jinja2_env(args)
|
|
|
|
shutil.copytree(
|
|
args.website_dir,
|
|
args.output_dir,
|
|
ignore=shutil.ignore_patterns(
|
|
"*.md",
|
|
"*.sh",
|
|
"*.css",
|
|
"*.json",
|
|
"js/*.js",
|
|
"build",
|
|
"docs",
|
|
"public",
|
|
"node_modules",
|
|
"src",
|
|
"templates",
|
|
"locale",
|
|
".gitkeep",
|
|
),
|
|
)
|
|
|
|
shutil.copytree(
|
|
os.path.join(args.website_dir, "images"),
|
|
os.path.join(args.output_dir, "docs", "images"),
|
|
)
|
|
|
|
# This file can be requested to check for available ClickHouse releases.
|
|
shutil.copy2(
|
|
os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
|
|
os.path.join(args.output_dir, "data", "version_date.tsv"),
|
|
)
|
|
|
|
# This file can be requested to install ClickHouse.
|
|
shutil.copy2(
|
|
os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
|
|
os.path.join(args.output_dir, "data", "install.sh"),
|
|
)
|
|
|
|
for root, _, filenames in os.walk(args.output_dir):
|
|
for filename in filenames:
|
|
if filename == "main.html":
|
|
continue
|
|
|
|
path = os.path.join(root, filename)
|
|
if not filename.endswith(".html"):
|
|
continue
|
|
logging.info("Processing %s", path)
|
|
with open(path, "rb") as f:
|
|
content = f.read().decode("utf-8")
|
|
|
|
template = env.from_string(content)
|
|
content = template.render(args.__dict__)
|
|
|
|
with open(path, "wb") as f:
|
|
f.write(content.encode("utf-8"))
|
|
|
|
|
|
def get_css_in(args):
|
|
return [
|
|
f"'{args.website_dir}/css/bootstrap.css'",
|
|
f"'{args.website_dir}/css/docsearch.css'",
|
|
f"'{args.website_dir}/css/base.css'",
|
|
f"'{args.website_dir}/css/blog.css'",
|
|
f"'{args.website_dir}/css/docs.css'",
|
|
f"'{args.website_dir}/css/highlight.css'",
|
|
f"'{args.website_dir}/css/main.css'",
|
|
]
|
|
|
|
|
|
def get_js_in(args):
|
|
return [
|
|
f"'{args.website_dir}/js/jquery.js'",
|
|
f"'{args.website_dir}/js/popper.js'",
|
|
f"'{args.website_dir}/js/bootstrap.js'",
|
|
f"'{args.website_dir}/js/sentry.js'",
|
|
f"'{args.website_dir}/js/base.js'",
|
|
f"'{args.website_dir}/js/index.js'",
|
|
f"'{args.website_dir}/js/docsearch.js'",
|
|
f"'{args.website_dir}/js/docs.js'",
|
|
f"'{args.website_dir}/js/main.js'",
|
|
]
|
|
|
|
|
|
def minify_file(path, css_digest, js_digest):
|
|
if not (path.endswith(".html") or path.endswith(".css")):
|
|
return
|
|
|
|
logging.info("Minifying %s", path)
|
|
with open(path, "rb") as f:
|
|
content = f.read().decode("utf-8")
|
|
if path.endswith(".html"):
|
|
content = minify_html(content)
|
|
content = content.replace("base.css?css_digest", f"base.css?{css_digest}")
|
|
content = content.replace("base.js?js_digest", f"base.js?{js_digest}")
|
|
# TODO: restore cssmin
|
|
# elif path.endswith('.css'):
|
|
# content = cssmin.cssmin(content)
|
|
# TODO: restore jsmin
|
|
# elif path.endswith('.js'):
|
|
# content = jsmin.jsmin(content)
|
|
with open(path, "wb") as f:
|
|
f.write(content.encode("utf-8"))
|
|
|
|
|
|
def minify_website(args):
|
|
css_in = " ".join(get_css_in(args))
|
|
css_out = f"{args.output_dir}/docs/css/base.css"
|
|
os.makedirs(f"{args.output_dir}/docs/css")
|
|
|
|
if args.minify and False: # TODO: return closure
|
|
command = (
|
|
f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' "
|
|
f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
|
|
)
|
|
logging.info(css_in)
|
|
logging.info(command)
|
|
output = subprocess.check_output(command, shell=True)
|
|
logging.debug(output)
|
|
|
|
else:
|
|
command = f"cat {css_in}"
|
|
output = subprocess.check_output(command, shell=True)
|
|
with open(css_out, "wb+") as f:
|
|
f.write(output)
|
|
|
|
with open(css_out, "rb") as f:
|
|
css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
|
|
|
js_in = " ".join(get_js_in(args))
|
|
js_out = f"{args.output_dir}/docs/js/base.js"
|
|
os.makedirs(f"{args.output_dir}/docs/js")
|
|
|
|
if args.minify and False: # TODO: return closure
|
|
js_in = [js[1:-1] for js in js_in]
|
|
closure_args = [
|
|
"--js",
|
|
*js_in,
|
|
"--js_output_file",
|
|
js_out,
|
|
"--compilation_level",
|
|
"SIMPLE",
|
|
"--dependency_mode",
|
|
"NONE",
|
|
"--third_party",
|
|
"--use_types_for_optimization",
|
|
"--isolation_mode",
|
|
"IIFE",
|
|
]
|
|
logging.info(closure_args)
|
|
if closure.run(*closure_args):
|
|
raise RuntimeError("failed to run closure compiler")
|
|
with open(js_out, "r") as f:
|
|
js_content = jsmin.jsmin(f.read())
|
|
with open(js_out, "w") as f:
|
|
f.write(js_content)
|
|
|
|
else:
|
|
command = f"cat {js_in}"
|
|
output = subprocess.check_output(command, shell=True)
|
|
with open(js_out, "wb+") as f:
|
|
f.write(output)
|
|
|
|
with open(js_out, "rb") as f:
|
|
js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8]
|
|
logging.info(js_digest)
|
|
|
|
if args.minify:
|
|
logging.info("Minifying website")
|
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
futures = []
|
|
for root, _, filenames in os.walk(args.output_dir):
|
|
for filename in filenames:
|
|
path = os.path.join(root, filename)
|
|
futures.append(
|
|
executor.submit(minify_file, path, css_digest, js_digest)
|
|
)
|
|
for future in futures:
|
|
exc = future.exception()
|
|
if exc:
|
|
logging.error(exc)
|
|
sys.exit(1)
|
|
|
|
|
|
def process_benchmark_results(args):
|
|
benchmark_root = os.path.join(args.website_dir, "benchmark")
|
|
required_keys = {
|
|
"dbms": ["result"],
|
|
"hardware": ["result", "system", "system_full", "kind"],
|
|
}
|
|
for benchmark_kind in ["dbms", "hardware"]:
|
|
results = []
|
|
results_root = os.path.join(benchmark_root, benchmark_kind, "results")
|
|
for result in sorted(os.listdir(results_root)):
|
|
result_file = os.path.join(results_root, result)
|
|
logging.debug(f"Reading benchmark result from {result_file}")
|
|
with open(result_file, "r") as f:
|
|
result = json.loads(f.read())
|
|
for item in result:
|
|
for required_key in required_keys[benchmark_kind]:
|
|
assert (
|
|
required_key in item
|
|
), f'No "{required_key}" in {result_file}'
|
|
results += result
|
|
results_js = os.path.join(
|
|
args.output_dir, "benchmark", benchmark_kind, "results.js"
|
|
)
|
|
with open(results_js, "w") as f:
|
|
data = json.dumps(results)
|
|
f.write(f"var results = {data};")
|